def test_get_roi_mask(self):
        """Test get_roi_mask()."""
        slide_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID)
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

        # read ground truth codes and information
        GTCodes = read_csv(GTCODE_PATH)
        GTCodes.index = GTCodes.loc[:, 'group']

        # get indices of rois
        idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes,
                                                   element_infos=element_infos)

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(
            slide_annotations=slide_annotations,
            element_infos=element_infos,
            GTCodes_df=GTCodes.copy(),
            idx_for_roi=idxs_for_all_rois[0],  # <- let's focus on first ROI,
            iou_thresh=0.0,
            roiinfo=None,
            crop_to_roi=True,
            use_shapely=True,
            verbose=False,
            monitorPrefix="roi 1")

        self.assertTupleEqual(ROI.shape, (4594, 4542))
        self.assertTupleEqual(
            (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH'], roiinfo['XMIN'],
             roiinfo['XMAX'], roiinfo['YMIN'], roiinfo['YMAX']),
            (4820, 7006, 59206, 66212, 33505, 38325))
示例#2
0
def annotation_info(gc, item_id, magnification=None):
    """Get item HistomicsUI annotations and element information.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client
    item_id : str
        item id
    magnification : float (optional)
        if give, annotatiosn will be scaled to match

    Returns
    -------
    annotations : list
        annotation documents information
    element_info : DataFrame
        annotation element information

    """
    # get item annotations
    annotations = gc.get('/annotation/item/' + item_id)

    if magnification is not None:
        # get factor for scaling annotations to given magnification
        mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=magnification)

        # scale annotations
        annotations = scale_slide_annotations(annotations, sf=mask_mag_factor)

    # get annotation element information (i.e. centers)
    element_info = get_bboxes_from_slide_annotations(annotations)

    return annotations, element_info
def test_prep(girderClient):  # noqa

    cfg.gc = girderClient

    cfg.iteminfo = cfg.gc.get('/item', parameters={
        'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0]

    # read GTCodes dataframe
    gtcodePath = getTestFilePath('sample_GTcodes.csv')
    cfg.GTcodes = read_csv(gtcodePath)
    cfg.GTcodes.index = cfg.GTcodes.loc[:, 'group']

    # other params
    cfg.get_roi_mask_kwargs = {
        'iou_thresh': 0.0,
        'crop_to_roi': True,
        'use_shapely': True,
        'verbose': False
    }
    cfg.get_contours_kwargs = {
        'groups_to_get': None,
        'roi_group': 'roi',
        'get_roi_contour': True,
        'discard_nonenclosed_background': True,
        'background_group': 'mostly_stroma',
        'MIN_SIZE': 10, 'MAX_SIZE': None,
        'verbose': False, 'monitorPrefix': ""
    }

    # Microns-per-pixel / Magnification (either or)
    cfg.MPP = 5.0
    cfg.MAG = None

    # get annotations for slide
    cfg.slide_annotations = cfg.gc.get('/annotation/item/' + cfg.iteminfo['_id'])

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(
        gc=cfg.gc, slide_id=cfg.iteminfo['_id'], MPP=cfg.MPP, MAG=cfg.MAG)
    cfg.slide_annotations = scale_slide_annotations(cfg.slide_annotations, sf=sf)

    # get bounding box information for all annotations
    cfg.element_infos = get_bboxes_from_slide_annotations(cfg.slide_annotations)

    # params for get_image_and_mask_from_slide()
    cfg.get_kwargs = {
        'gc': cfg.gc, 'slide_id': cfg.iteminfo['_id'],
        'GTCodes_dict': cfg.GTcodes.T.to_dict(),
        'bounds': {
            'XMIN': 58000, 'XMAX': 63000,
            'YMIN': 35000, 'YMAX': 39000},
        'MPP': cfg.MPP,
        'MAG': cfg.MAG,
        'get_roi_mask_kwargs': cfg.get_roi_mask_kwargs,
        'get_contours_kwargs': cfg.get_contours_kwargs,
        'get_rgb': True,
        'get_contours': True,
        'get_visualization': True,
    }
示例#4
0
    def test_get_bboxes_from_slide_annotations(self):
        """Test get_bboxes_from_slide_annotations."""
        slide_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID)
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

        self.assertTupleEqual(element_infos.shape, (76, 9))
        self.assertTupleEqual(tuple(element_infos.columns),
                              (('annidx', 'elementidx', 'type', 'group',
                                'xmin', 'xmax', 'ymin', 'ymax', 'bbox_area')))
示例#5
0
    def test_get_bboxes_from_slide_annotations(self):
        """Test get_bboxes_from_slide_annotations."""

        element_infos = get_bboxes_from_slide_annotations(
            copy.deepcopy(cfg.annotations))

        assert element_infos.shape == (76, 9)
        assert set(element_infos.columns) == {
            'annidx', 'elementidx', 'type', 'group', 'xmin', 'xmax', 'ymin',
            'ymax', 'bbox_area'
        }
示例#6
0
def get_all_rois_from_slide(
    gc,
    slide_id,
    GTCodes_dict,
    save_directories,
    get_image_and_mask_from_slide_kwargs=None,
    slide_name=None,
    verbose=True,
    monitorPrefix="",
):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is mainly uses
    get_image_and_mask_from_slide(), which should be referred to
    for implementation details.

    Parameters
    -----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.

    save_directories : dict
        paths to directories to save data. Each entry is a string, and the
        following keys are allowed
        - ROI: path to save masks (labeled images)
        - rgb: path to save rgb images
        - contours: path to save annotation contours
        - visualization: path to save rgb visualzation overlays

    get_image_and_mask_from_slide_kwargs : dict
        kwargs to pass to get_image_and_mask_from_slide()
        default values are assigned if speceific parameters are not given.

    slide_name : str or None
        If not given, it's inferred using a server request using girder client.

    verbose : bool
        Print progress to screen?

    monitorPrefix : str
        text to prepend to printed statements

    Returns
    --------
    list of dicts
        each entry contains the following keys
        - ROI: path to saved mask (labeled image)
        - rgb: path to saved rgb image
        - contours: path to saved annotation contours
        - visualization: path to saved rgb visualzation overlay

    """
    # assign defaults if nothing given
    default_keyvalues = {
        'MPP': 5.0,
        'MAG': None,
        'get_roi_mask_kwargs': {
            'iou_thresh': 0.0,
            'crop_to_roi': True,
            'use_shapely': True,
            'verbose': False
        },
        'get_contours_kwargs': {
            'groups_to_get': None,
            'roi_group': 'roi',
            'get_roi_contour': True,
            'discard_nonenclosed_background': True,
            'background_group': 'mostly_stroma',
            'MIN_SIZE': 10,
            'MAX_SIZE': None,
            'verbose': False,
            'monitorPrefix': ""
        },
        'get_rgb': True,
        'get_contours': True,
        'get_visualization': True,
    }

    kvp = get_image_and_mask_from_slide_kwargs or {}  # for easy referencing
    for k, v in default_keyvalues.items():
        if k not in kvp.keys():
            kvp[k] = v

    # convert to df and sanity check
    GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    if any(GTCodes_df.loc[:, 'GT_code'] <= 0):
        raise Exception("All GT_code must be > 0")

    # if not given, assign name of first file associated with girder item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(gc=gc,
                                           slide_id=slide_id,
                                           MPP=kvp['MPP'],
                                           MAG=kvp['MAG'])
    slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get idx of all 'special' roi annotations
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df,
                                               element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1,
                                            len(idxs_for_all_rois))

        # get specified area
        roi_out = get_image_and_mask_from_slide(
            gc=gc,
            slide_id=slide_id,
            GTCodes_dict=GTCodes_dict,
            mode='polygonal_bounds',
            idx_for_roi=idx_for_roi,
            slide_annotations=slide_annotations,
            element_infos=element_infos,
            **kvp)

        # now save roi (mask, rgb, contours, vis)

        this_roi_savenames = dict()
        ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % (
            slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'],
            roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX'])

        for imtype in ['ROI', 'rgb', 'visualization']:
            if imtype in roi_out.keys():
                savename = os.path.join(save_directories[imtype],
                                        ROINAMESTR + ".png")
                if verbose:
                    print("%s: Saving %s\n" % (roicountStr, savename))
                imwrite(im=roi_out[imtype], uri=savename)
                this_roi_savenames[imtype] = savename

        if 'contours' in roi_out.keys():
            savename = os.path.join(save_directories['contours'],
                                    ROINAMESTR + ".csv")
            if verbose:
                print("%s: Saving %s\n" % (roicountStr, savename))
            contours_df = DataFrame(roi_out['contours'])
            contours_df.to_csv(savename)
            this_roi_savenames['contours'] = savename

        savenames.append(this_roi_savenames)

    return savenames
示例#7
0
def get_image_and_mask_from_slide(gc,
                                  slide_id,
                                  GTCodes_dict,
                                  MPP=5.0,
                                  MAG=None,
                                  mode='min_bounding_box',
                                  bounds=None,
                                  idx_for_roi=None,
                                  slide_annotations=None,
                                  element_infos=None,
                                  get_roi_mask_kwargs=None,
                                  get_contours_kwargs=None,
                                  linewidth=0.2,
                                  get_rgb=True,
                                  get_contours=True,
                                  get_visualization=True):
    """Parse region from the slide and get its corresponding labeled mask.

    This is a wrapper around get_roi_mask() which should be referred to for
    implementation details.

    Parameters
    -----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.

    MPP : float or None
        Microns-per-pixel -- best use this as it's more well-defined than
        magnification which is more scanner/manufacturer specific.
        MPP of 0.25 often roughly translates to 40x

    MAG : float or None
        If you prefer to use whatever magnification is reported in slide.
        If neither MPP or MAG is provided, everything is retrieved without
        scaling at base (scan) magnification.

    mode : str
        This specifies which part of the slide to get the mask from. Allowed
        modes include the following
        - wsi: get scaled up/down version of mask of whole slide
        - min_bounding_box: get minimum box for all annotations in slide
        - manual_bounds: use given ROI bounds provided by the 'bounds' param
        - polygonal_bounds: use the idx_for_roi param to get coordinates

    bounds : dict or None
        if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide
        region coordinates (AT BASE MAGNIFICATION) to get labeled image
        (mask) for. Use this with the 'manual_bounds' run mode.

    idx_for_roi : int
        index of ROI within the element_infos dataframe.
        Use this with the 'polygonal_bounds' run mode.

    slide_annotations : list or None
        Give this parameter to avoid re-getting slide annotations. If you do
        provide the annotations, though, make sure you have used
        scale_slide_annotations() to scale them up/down by sf BEFOREHAND.

    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obained by get_bboxes_from_slide_annotations() method.
        Make sure you have used scale_slide_annotations().

    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    get_contours_kwargs : dict
        extra kwargs for get_contours_from_mask()

    linewidth : float
        visualization line width

    get_rgb: bool
        get rgb image?

    get_contours : bool
        get annotation contours? (relative to final mask)

    get_visualization : bool
        get overlayed annotation bounds over RGB for visualization

    Returns
    --------
    dict
        Results dict containing one or more of the following keys
        bounds: dict of bounds at scan magnification
        ROI - (mxn) labeled image (mask)
        rgb - (mxnx3 np array) corresponding rgb image
        contours - list, each entry is a dict version of a row from the output
        of masks_to_annotations_handler.get_contours_from_mask()
        visualization - (mxnx3 np array) visualization overlay

    """
    get_roi_mask_kwargs = get_roi_mask_kwargs or {}
    get_contours_kwargs = get_contours_kwargs or {}
    # important sanity checks
    (MPP, MAG, mode, bounds, idx_for_roi, get_roi_mask_kwargs, get_rgb,
     get_contours,
     get_visualization) = _sanity_checks(MPP, MAG, mode, bounds, idx_for_roi,
                                         get_roi_mask_kwargs, get_rgb,
                                         get_contours, get_visualization)

    # calculate the scale factor
    sf, appendStr = get_scale_factor_and_appendStr(gc=gc,
                                                   slide_id=slide_id,
                                                   MPP=MPP,
                                                   MAG=MAG)

    if slide_annotations is not None:
        assert element_infos is not None, "must also provide element_infos"
    else:
        # get annotations for slide
        slide_annotations = gc.get('/annotation/item/' + slide_id)

        # scale up/down annotations by a factor
        slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

        # get bounding box information for all annotations -> scaled by sf
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # Detemine get region based on run mode, keeping in mind that it
    # must be at BASE MAGNIFICATION coordinates before it is passed
    # on to get_mask_from_slide()
    bounds = _get_roi_bounds_by_run_mode(gc=gc,
                                         slide_id=slide_id,
                                         mode=mode,
                                         bounds=bounds,
                                         element_infos=element_infos,
                                         idx_for_roi=idx_for_roi,
                                         sf=sf)
    result = {
        'bounds': bounds,
    }

    # get mask for specified area
    if mode == 'polygonal_bounds':
        # get roi mask and info
        ROI, _ = get_roi_mask(slide_annotations=slide_annotations,
                              element_infos=element_infos,
                              GTCodes_df=DataFrame.from_dict(GTCodes_dict,
                                                             orient='index'),
                              idx_for_roi=idx_for_roi,
                              **get_roi_mask_kwargs)
    else:
        ROI, _ = get_mask_from_slide(GTCodes_dict=GTCodes_dict,
                                     roiinfo=copy.deepcopy(bounds),
                                     slide_annotations=slide_annotations,
                                     element_infos=element_infos,
                                     sf=sf,
                                     get_roi_mask_kwargs=get_roi_mask_kwargs)

    # get RGB
    if get_rgb:
        rgb, ROI = _get_rgb_and_pad_roi(gc=gc,
                                        slide_id=slide_id,
                                        bounds=bounds,
                                        appendStr=appendStr,
                                        ROI=ROI)
        result['rgb'] = rgb

    # pack result (we have to do it here in case of padding)
    result['ROI'] = ROI

    # get contours
    if get_contours:
        contours_list = get_contours_from_mask(MASK=ROI,
                                               GTCodes_df=DataFrame.from_dict(
                                                   GTCodes_dict,
                                                   orient='index'),
                                               **get_contours_kwargs)
        contours_list = contours_list.to_dict(orient='records')
        result['contours'] = contours_list

    # get visualization of annotations on RGB
    if get_visualization:
        result['visualization'] = _visualize_annotations_on_rgb(
            rgb=rgb, contours_list=contours_list, linewidth=linewidth)

    return result
示例#8
0
def get_all_rois_from_slide_v2(gc,
                               slide_id,
                               GTCodes_dict,
                               save_directories,
                               annotations_to_contours_kwargs=None,
                               mode='object',
                               get_mask=True,
                               slide_name=None,
                               verbose=True,
                               monitorprefix="",
                               callback=None,
                               callback_kwargs=None):
    """Get all ROIs for a slide without an intermediate mask form.

    This mainly relies on contours_to_labeled_object_mask(), which should
    be referred to for extra documentation.

    This can be run in either the "object" mode, whereby the saved masks
    are a three-channel png where first channel encodes class label (i.e.
    same as semantic segmentation) and the product of the values in the
    second and third channel encodes the object ID. Otherwise, the user
    may decide to run in the "semantic" mode and the resultant mask would
    consist of only one channel (semantic segmentation with no object
    differentiation).

    The difference between this and version 1, found at
    histomicstk.annotations_and_masks.annotations_to_masks_handler.
    get_all_rois_from_slide()
    is that this (version 2) gets the contours first, including cropping
    to wanted ROI boundaries and other processing using shapely, and THEN
    parses these into masks. This enables us to differentiate various objects
    to use the data for object localization or classification or segmentation
    tasks. If you would like to get semantic segmentation masks, i.e. you do
    not really care about individual objects, you can use either version 1
    or this method. They re-use much of the same code-base, but some edge
    cases maybe better handled by version 1. For example, since
    this version uses shapely first to crop, some objects may be incorrectly
    parsed by shapely. Version 1, using PIL.ImageDraw may not have these
    problems.

    Bottom line is: if you need semantic segmentation masks, it is probably
    safer to use version 1, whereas if you need object segmentation masks,
    this method should be used.

    Parameters
    ----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.

    save_directories : dict
        paths to directories to save data. Each entry is a string, and the
        following keys are allowed
        - ROI: path to save masks (labeled images)
        - rgb: path to save rgb images
        - contours: path to save annotation contours
        - visualization: path to save rgb visualzation overlays

    mode : str
        run mode for getting masks. Must me in
        - object: get 3-channel mask where first channel encodes label
        (tumor, stroma, etc) while product of second and third
        channel encodes the object ID (i.e. individual contours)
        This is useful for object localization and segmentation tasks.
        - semantic: get a 1-channel mask corresponding to the first channel
        of the object mode.

    get_mask : bool
        While the main purpose of this method IS to get object segmentation
        masks, it is conceivable that some users might just want to get
        the RGB and contours. Default is True.

    annotations_to_contours_kwargs : dict
        kwargs to pass to annotations_to_contours_no_mask()
        default values are assigned if specific parameters are not given.

    slide_name : str or None
        If not given, its inferred using a server request using girder client.

    verbose : bool
        Print progress to screen?

    monitorprefix : str
        text to prepend to printed statements

    callback : function
        a callback function to run on the roi dictionary output. This is
        internal, but if you really want to use this, make sure the callback
        can accept the following keys and that you do NOT assign them yourself
        gc, slide_id, slide_name, MPP, MAG, verbose, monitorprefix
        Also, this callback MUST *ONLY* return thr roi dictionary, whether
        or not it is modified inside it. If it is modified inside the callback
        then the modified version is the one that will be saved to disk.

    callback_kwargs : dict
        kwargs to pass to callback, not including the mandatory kwargs
        that will be passed internally (mentioned earlier here).

    Returns
    --------
    list of dicts
        each entry contains the following keys
        mask - path to saved mask
        rgb - path to saved rgb image
        contours - path to saved annotation contours
        visualization - path to saved rgb visualzation overlay

    """
    default_keyvalues = {
        'MPP': None,
        'MAG': None,
        'linewidth': 0.2,
        'get_rgb': True,
        'get_visualization': True,
    }

    # assign defaults if nothing given
    kvp = annotations_to_contours_kwargs or {}  # for easy referencing
    for k, v in default_keyvalues.items():
        if k not in kvp.keys():
            kvp[k] = v

    # convert to df and sanity check
    gtcodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    if any(gtcodes_df.loc[:, 'GT_code'] <= 0):
        raise Exception("All GT_code must be > 0")

    # if not given, assign name of first file associated with girder item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(gc=gc,
                                           slide_id=slide_id,
                                           MPP=kvp['MPP'],
                                           MAG=kvp['MAG'])
    slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get idx of all 'special' roi annotations
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=gtcodes_df,
                                               element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (monitorprefix, roino + 1,
                                            len(idxs_for_all_rois))

        # get specified area
        roi_out = annotations_to_contours_no_mask(
            gc=gc,
            slide_id=slide_id,
            mode='polygonal_bounds',
            idx_for_roi=idx_for_roi,
            slide_annotations=slide_annotations,
            element_infos=element_infos,
            **kvp)

        # get corresponding mask (semantic or object)
        if get_mask:
            roi_out['mask'] = contours_to_labeled_object_mask(
                contours=DataFrame(roi_out['contours']),
                gtcodes=gtcodes_df,
                mode=mode,
                verbose=verbose,
                monitorprefix=roicountStr)

        # now run callback on roi_out
        if callback is not None:
            # these are 'compulsory' kwargs for the callback
            # since it will not have access to these otherwise
            callback_kwargs.update({
                'gc': gc,
                'slide_id': slide_id,
                'slide_name': slide_name,
                'MPP': kvp['MPP'],
                'MAG': kvp['MAG'],
                'verbose': verbose,
                'monitorprefix': roicountStr,
            })
            callback(roi_out, **callback_kwargs)

        # now save roi (rgb, vis, mask)

        this_roi_savenames = dict()
        ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % (
            slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'],
            roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX'])

        for imtype in ['mask', 'rgb', 'visualization']:
            if imtype in roi_out.keys():
                savename = os.path.join(save_directories[imtype],
                                        ROINAMESTR + ".png")
                if verbose:
                    print("%s: Saving %s" % (roicountStr, savename))
                imwrite(im=roi_out[imtype], uri=savename)
                this_roi_savenames[imtype] = savename

        # save contours
        savename = os.path.join(save_directories['contours'],
                                ROINAMESTR + ".csv")
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename))
        contours_df = DataFrame(roi_out['contours'])
        contours_df.to_csv(savename)
        this_roi_savenames['contours'] = savename

        savenames.append(this_roi_savenames)

    return savenames
示例#9
0
def annotations_to_contours_no_mask(gc,
                                    slide_id,
                                    MPP=5.0,
                                    MAG=None,
                                    mode='min_bounding_box',
                                    bounds=None,
                                    idx_for_roi=None,
                                    slide_annotations=None,
                                    element_infos=None,
                                    linewidth=0.2,
                                    get_rgb=True,
                                    get_visualization=True,
                                    text=True):
    """Process annotations to get RGB and contours without intermediate masks.

    Parameters
    ----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    MPP : float or None
        Microns-per-pixel -- best use this as it's more well-defined than
        magnification which is more scanner or manufacturer specific.
        MPP of 0.25 often roughly translates to 40x

    MAG : float or None
        If you prefer to use whatever magnification is reported in slide.
        If neither MPP or MAG is provided, everything is retrieved without
        scaling at base (scan) magnification.

    mode : str
        This specifies which part of the slide to get the mask from. Allowed
        modes include the following
        - wsi: get scaled up or down version of mask of whole slide
        - min_bounding_box: get minimum box for all annotations in slide
        - manual_bounds: use given ROI bounds provided by the 'bounds' param
        - polygonal_bounds: use the idx_for_roi param to get coordinates

    bounds : dict or None
        if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide
        region coordinates (AT BASE MAGNIFICATION) to get labeled image
        (mask) for. Use this with the 'manual_bounds' run mode.

    idx_for_roi : int
        index of ROI within the element_infos dataframe.
        Use this with the 'polygonal_bounds' run mode.

    slide_annotations : list or None
        Give this parameter to avoid re-getting slide annotations. If you do
        provide the annotations, though, make sure you have used
        scale_slide_annotations() to scale them up or down by sf BEFOREHAND.

    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obained by get_bboxes_from_slide_annotations() method.
        Make sure you have used scale_slide_annotations().

    linewidth : float
        visualization line width

    get_rgb: bool
        get rgb image?

    get_visualization : bool
        get overlayed annotation bounds over RGB for visualization

    text : bool
        add text labels to visualization?

    Returns
    --------
    dict
        Results dict containing one or more of the following keys
        - bounds: dict of bounds at scan magnification
        - rgb: (mxnx3 np array) corresponding rgb image
        - contours: dict
        - visualization: (mxnx3 np array) visualization overlay

    """
    MPP, MAG, mode, bounds, idx_for_roi, get_rgb, get_visualization = \
        _sanity_checks(
            MPP, MAG, mode, bounds, idx_for_roi,
            get_rgb, get_visualization)

    # calculate the scale factor
    sf, appendStr = get_scale_factor_and_appendStr(gc=gc,
                                                   slide_id=slide_id,
                                                   MPP=MPP,
                                                   MAG=MAG)

    if slide_annotations is not None:
        assert element_infos is not None, "must also provide element_infos"
    else:
        # get annotations for slide
        slide_annotations = gc.get('/annotation/item/' + slide_id)

        # scale up/down annotations by a factor
        slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

        # get bounding box information for all annotations -> scaled by sf
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # Determine get region based on run mode, keeping in mind that it
    # must be at BASE MAGNIFICATION coordinates before it is passed
    # on to get_mask_from_slide()
    # if mode != 'polygonal_bound':
    bounds = _get_roi_bounds_by_run_mode(gc=gc,
                                         slide_id=slide_id,
                                         mode=mode,
                                         bounds=bounds,
                                         element_infos=element_infos,
                                         idx_for_roi=idx_for_roi,
                                         sf=sf)

    # only keep relevant elements and get uncropped bounds
    elinfos_roi, uncropped_bounds = _keep_relevant_elements_for_roi(
        element_infos,
        sf=sf,
        mode=mode,
        idx_for_roi=idx_for_roi,
        roiinfo=copy.deepcopy(bounds))

    # find relevant portion from slide annotations to use
    # (with overflowing beyond edge)
    annotations_slice = _trim_slide_annotations_to_roi(
        copy.deepcopy(slide_annotations), elinfos_roi=elinfos_roi)

    # get roi polygon vertices
    rescaled_bounds = {k: int(v * sf) for k, v in bounds.items()}
    if mode == 'polygonal_bounds':
        roi_coords = _get_coords_from_element(
            copy.deepcopy(slide_annotations[int(
                element_infos.loc[idx_for_roi,
                                  'annidx'])]['annotation']['elements'][int(
                                      element_infos.loc[idx_for_roi,
                                                        'elementidx'])]))
        cropping_bounds = None
    else:
        roi_coords = None
        cropping_bounds = rescaled_bounds

    # tabularize to use contours
    _, contours_df = parse_slide_annotations_into_tables(
        annotations_slice,
        cropping_bounds=cropping_bounds,
        cropping_polygon_vertices=roi_coords,
        use_shapely=mode in ('manual_bounds', 'polygonal_bounds'),
    )
    contours_list = contours_df.to_dict(orient='records')

    # Final bounds (relative to slide at base magnification)
    bounds = {k: int(v / sf) for k, v in rescaled_bounds.items()}
    result = dict()

    # get RGB
    if get_rgb:
        getStr = \
            "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG" \
            % (slide_id,
               bounds['XMIN'], bounds['XMAX'],
               bounds['YMIN'], bounds['YMAX'])
        getStr += appendStr
        resp = gc.get(getStr, jsonResp=False)
        rgb = get_image_from_htk_response(resp)
        result['rgb'] = rgb

    # Assign to results
    result.update({
        'contours': contours_list,
        'bounds': bounds,
    })

    # get visualization of annotations on RGB
    if get_visualization:
        result['visualization'] = _visualize_annotations_on_rgb(
            rgb=rgb,
            contours_list=contours_list,
            linewidth=linewidth,
            text=text)

    return result
def get_all_roi_masks_for_slide(
        input_img, input_ann, GTCODE_PATH, MASK_SAVEPATH, slide_name=None,
        verbose=True, monitorPrefix="", get_roi_mask_kwargs=dict()):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is a wrapper around get_roi_mask()
    which should be referred to for implementation details.

    Parameters
    -----------
    input_img : object
        input large image object
    input_ann : object
        input annotation object
    GTCODE_PATH : str
        path to the ground truth codes and information
        csv file. Refer to the docstring of get_roi_mask() for more info.
    MASK_SAVEPATH : str
        path to directory to save ROI masks
    slide_name (optional) : str
        If not given, it's inferred using a server request using girder client.
    verbose (optional) : bool
        Print progress to screen?
    monitorPrefix (optional) : str
        text to prepend to printed statements
    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    Returns
    --------
    list of strs
        save paths for ROIs

    """
    # read ground truth codes and information
    GTCodes = read_csv(GTCODE_PATH)
    GTCodes.index = GTCodes.loc[:, 'group']
    if any(GTCodes.loc[:, 'GT_code'] <= 0):
        raise Exception("All GT_code must be > 0")

    # get annotations for slide
    slide_annotations = input_ann

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get indices of rois
    idxs_for_all_rois = _get_idxs_for_all_rois(
        GTCodes=GTCodes, element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (
            monitorPrefix, roino + 1, len(idxs_for_all_rois))

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(
            slide_annotations=slide_annotations, element_infos=element_infos,
            GTCodes_df=GTCodes.copy(), idx_for_roi=idx_for_roi,
            monitorPrefix=roicountStr, **get_roi_mask_kwargs)

        ## make directory for the mask
        MASK_SAVEPATH_MASK = MASK_SAVEPATH + '/mask'
        # create folders if necessary
        for folder in [MASK_SAVEPATH_MASK, ]:
            try:
                os.mkdir(folder)
            except:
                pass

        # now save roi
        ROINAMESTR = "%s_left-%d_top-%d" % (
            slide_name, roiinfo['XMIN'], roiinfo['YMIN'])
        savename = os.path.join(MASK_SAVEPATH_MASK, ROINAMESTR + ".png")
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename))
        imwrite(im=ROI, uri=savename)

        region = [roiinfo['XMIN'], roiinfo['YMIN'], roiinfo['BBOX_WIDTH'], roiinfo['BBOX_HEIGHT']]
        maxRegionSize = 5000

        ## make directory for the region
        MASK_SAVEPATH_REG = MASK_SAVEPATH + '/region'
        # create folders if necessary
        for folder in [MASK_SAVEPATH_REG, ]:
            try:
                os.mkdir(folder)
            except:
                pass

        #######save images
        im_input = input_img.getRegion(
            format=large_image.tilesource.TILE_FORMAT_NUMPY,
            **utils.get_region_dict(region, maxRegionSize, input_img))[0]

        ROINAMESTR1 = "%s_left-%d_top-%d" % (
            slide_name, roiinfo['XMIN'], roiinfo['YMIN'])
        savename1 = os.path.join(MASK_SAVEPATH_REG, ROINAMESTR1 + ".png")
        skimage.io.imsave(savename1, im_input)
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename1))

        savenames.append(savename)

    return savenames
示例#11
0
def test_prep(girderClient):  # noqa

    cfg.gc = girderClient

    iteminfo = cfg.gc.get('/item', parameters={
        'text': "TCGA-A2-A0YE-01Z-00-DX1"})[0]

    # read GTCodes dataframe
    gtcodePath = getTestFilePath('sample_GTcodes.csv')
    GTCodes_dict = read_csv(gtcodePath)
    GTCodes_dict.index = GTCodes_dict.loc[:, 'group']
    GTCodes_dict = GTCodes_dict.to_dict(orient='index')

    # just a temp directory to save masks for now
    cfg.BASE_SAVEPATH = tempfile.mkdtemp()
    cfg.SAVEPATHS = {
        'contours': os.path.join(cfg.BASE_SAVEPATH, 'contours'),
        'rgb': os.path.join(cfg.BASE_SAVEPATH, 'rgbs'),
        'visualization': os.path.join(cfg.BASE_SAVEPATH, 'vis'),
        'mask': os.path.join(cfg.BASE_SAVEPATH, 'masks'),
    }
    for _, savepath in cfg.SAVEPATHS.items():
        if not os.path.exists(savepath):
            os.mkdir(savepath)

    # Microns-per-pixel / Magnification (either or)
    cfg.MPP = 5.0
    cfg.MAG = None

    # get annotations for slide
    cfg.slide_annotations = cfg.gc.get('/annotation/item/' + iteminfo['_id'])

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(
        gc=cfg.gc, slide_id=iteminfo['_id'], MPP=cfg.MPP, MAG=cfg.MAG)
    cfg.slide_annotations = scale_slide_annotations(cfg.slide_annotations, sf=sf)

    # get bounding box information for all annotations
    cfg.element_infos = get_bboxes_from_slide_annotations(cfg.slide_annotations)

    # common params for annotations_to_contours_no_mask()
    annotations_to_contours_kwargs = {
        'MPP': cfg.MPP, 'MAG': cfg.MAG,
        'linewidth': 0.2,
        'get_rgb': True, 'get_visualization': True,
    }

    # params for TESTING annotations_to_contours_no_mask()
    cfg.test_annots_to_contours_kwargs = copy.deepcopy(
        annotations_to_contours_kwargs)
    cfg.test_annots_to_contours_kwargs.update({
        'gc': cfg.gc,
        'slide_id': iteminfo['_id'],
        'bounds': {
            'XMIN': 58000, 'XMAX': 63000,
            'YMIN': 35000, 'YMAX': 39000},
        })

    # params for getting all rois for slide
    cfg.get_all_rois_kwargs = {
        'gc': cfg.gc,
        'slide_id': iteminfo['_id'],
        'GTCodes_dict': GTCodes_dict,
        'save_directories': cfg.SAVEPATHS,
        'annotations_to_contours_kwargs': annotations_to_contours_kwargs,
        'slide_name': 'TCGA-A2-A0YE',
        'verbose': False,
        'monitorprefix': 'test',
    }
示例#12
0
def get_all_roi_masks_for_slide(gc,
                                slide_id,
                                GTCODE_PATH,
                                MASK_SAVEPATH,
                                slide_name=None,
                                verbose=True,
                                monitorPrefix="",
                                get_roi_mask_kwargs=dict()):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is a wrapper around get_roi_mask()
    which should be referred to for implementation details.

    Parameters
    -----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)
    slide_id : str
        girder id for item (slide)
    GTCODE_PATH : str
        path to the ground truth codes and information
        csv file. Refer to the docstring of get_roi_mask() for more info.
    MASK_SAVEPATH : str
        path to directory to save ROI masks
    slide_name (optional) : str
        If not given, it's inferred using a server request using girder client.
    verbose (optional) : bool
        Print progress to screen?
    monitorPrefix (optional) : str
        text to prepend to printed statements
    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    Returns
    --------
    list of strs
        save paths for ROIs

    """
    # if not given, assign name of first file associated with item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # read ground truth codes and information
    GTCodes = read_csv(GTCODE_PATH)
    GTCodes.index = GTCodes.loc[:, 'group']
    assert all(GTCodes.loc[:, 'GT_code'] > 0), "All GT_code must be > 0"

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get indices of rois
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes,
                                               element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1,
                                            len(idxs_for_all_rois))

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(slide_annotations=slide_annotations,
                                    element_infos=element_infos,
                                    GTCodes_df=GTCodes.copy(),
                                    idx_for_roi=idx_for_roi,
                                    monitorPrefix=roicountStr,
                                    **get_roi_mask_kwargs)

        # now save roi
        ROINAMESTR = "%s_left-%d_top-%d_mag-BASE" % (
            slide_name, roiinfo['XMIN'], roiinfo['YMIN'])
        savename = os.path.join(MASK_SAVEPATH, ROINAMESTR + ".png")
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename))
        imwrite(im=ROI, uri=savename)

        savenames.append(savename)

    return savenames
示例#13
0
def grid_tiling(gc,
                item_id,
                group_names,
                save_dir,
                save_mag=None,
                mask_mag=1.25,
                tile_size=(224, 224),
                tissue_threshold=0.3,
                annotation_threshold=0.15,
                random_seed=64,
                is_test=False,
                oversample_background=2.0,
                reinhard_stats=None):
    """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the
    image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner).
    At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI
    dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the
    tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not
    containing annotations (but have tissue) are by default saved into background class. A background annotation group
    will cause issues so avoid having this annotation group name.

    Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size
    specified should be the tile size at the save magnification not the source magnification. Image saved will be of the
    tile size specified in parameters, regardless of the save_mag used.

    Parameters
    ----------
    gc : girder_client.GirderClient
        authenticated client
    item_id : str
        DSA image item id
    group_names : list
        list of annotation group names
    save_dir : str
        directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc.
    save_mag : float (optional)
        magnification to use when saving the images, if None then source magnification will be used.
    mask_mag : float (optional)
        magnification to create binary mask of tissue and annotations. Note that if your annotations are very small
        it will benefit to use a larger value than default here, but binary masks will fail to create at very high
        magnifications.
    tile_size : tuple (optional)
        size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the
        magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than
        the actual tile_size will represent 4 times the pixels at full resolution).
    tissue_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain tissue to be included
    annotation_threshold : float (optional)
        from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note
        that a single tile may be saved twice, representing multiple classes.
    random_seed : int (optional)
        random seed to use when shuffling the background regions
    is_test : bool (optional)
        if True then all the background regions will be saved, otherwise oversample_background will be used to determine
        how many background regions to save
    oversample_background : float (optional)
        factor to oversample background class images, compared to the number of images of the class of annoation images
        with the most images saved
    reinhard_stats : dict (optional)
        if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard
        color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and
        target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to.

    """
    im_info = gc.get('item/{}/tiles'.format(item_id))
    if save_mag is None:
        save_mag = im_info['magnification']

    if reinhard_stats is not None:
        # get color stats for image
        mu, sigma = reinhard_color_stats(gc, item_id)

    # ----- prep work ----- #
    filename = splitext(gc.getItem(item_id)['name'])[0]

    # create dirs for each image class to save
    group_dirs = [join(save_dir, group_name) for group_name in group_names]
    for group_dir in group_dirs:
        makedirs(group_dir, exist_ok=True)
    background_dir = join(save_dir, 'background')
    makedirs(background_dir, exist_ok=True)

    # get image annotations
    annotations = gc.get('/annotation/item/' + item_id)

    # create a dataframe to use with annotation to mask handler functions (gt codes)
    gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), '']
               for i, group_name in enumerate(group_names)]
    gt_codes = pd.DataFrame(columns=[
        'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class',
        'color', 'comments'
    ],
                            data=gt_data,
                            index=range(len(group_names)))
    gt_codes.index = gt_codes.loc[:, 'group']

    # get binary masks - tissue mask and annotation(s) mask
    mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=mask_mag)
    # - scaling the annotations to lower magnification
    mask_annotations = scale_slide_annotations(deepcopy(annotations),
                                               sf=mask_mag_factor)

    # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for
    # - getting the tissue mask
    mask_element_info = get_bboxes_from_slide_annotations(mask_annotations)
    get_kwargs = deepcopy(
        GET_KWARGS)  # avoid referencing on the global variable
    get_kwargs['gc'] = gc
    get_kwargs['slide_id'] = item_id
    get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict()
    get_kwargs['bounds'] = None
    get_kwargs['MAG'] = mask_mag
    ann_mask_and_image = get_image_and_mask_from_slide(
        mode='wsi',
        slide_annotations=mask_annotations,
        element_infos=mask_element_info,
        **get_kwargs)
    tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0]

    # convert the annotations to lower magnification
    fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc,
                                                        slide_id=item_id,
                                                        MAG=save_mag)
    annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor)
    lr_element_info = get_bboxes_from_slide_annotations(annotations)

    # get full resolution information for image
    fr_mag = im_info['magnification']
    fr_width = im_info['sizeX']
    fr_height = im_info['sizeY']
    fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int(
        tile_size[1] / fr_to_lr_factor)  # (width, height)

    # change the get_kwargs to save magnification
    get_kwargs['MAG'] = save_mag

    # ----- loop through image at full res ----- #
    group_annotation_counts = [0] * len(group_names)
    background_regions = []
    for x in range(0, fr_width, fr_tile_size[0]):
        for y in range(0, fr_height, fr_tile_size[1]):
            # check that the tile won't go over the edge of image, if so skip
            if x + fr_tile_size[0] > fr_width or y + fr_tile_size[
                    1] > fr_height:
                continue

            # check tile for tissue, using the binary mask for tissue
            tissue_tile = tissue_mask[int(y * mask_mag /
                                          fr_mag):int((y + fr_tile_size[1]) *
                                                      mask_mag / fr_mag),
                                      int(x * mask_mag /
                                          fr_mag):int((x + fr_tile_size[0]) *
                                                      mask_mag / fr_mag)]

            # skip if tile does not contain enough tissue
            if np.count_nonzero(
                    tissue_tile) / tissue_tile.size < tissue_threshold:
                continue

            # check tile for annotations, using the binary mask for annotations
            annotation_tile = ann_mask_and_image['ROI'][
                int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) *
                                               mask_mag / fr_mag),
                int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) *
                                               mask_mag / fr_mag)]

            # tile is background if no annotation is present (of any group)
            background_flag = True
            # - check for each annotation group
            for i, group_name in enumerate(group_names):
                group_annotation_tile = annotation_tile == i + 1

                # tile is ignored if not enough contain annotation
                if np.count_nonzero(
                        group_annotation_tile
                ) / group_annotation_tile.size < annotation_threshold:
                    continue

                background_flag = False
                group_annotation_counts[i] += 1

                # get annotation image and save it
                get_kwargs['bounds'] = {
                    'XMIN': x,
                    'XMAX': x + fr_tile_size[0],
                    'YMIN': y,
                    'YMAX': y + fr_tile_size[1]
                }

                annotation_im = get_image_and_mask_from_slide(
                    mode='manual_bounds',
                    slide_annotations=annotations,
                    element_infos=lr_element_info,
                    **get_kwargs)['rgb']

                # save the image to correct directory
                imwrite(
                    join(group_dirs[i],
                         '{}_x_{}_y_{}.png'.format(filename, x, y)),
                    annotation_im)

                if reinhard_stats is not None:
                    # add color augmentation with Reinhard method
                    for j, (_, v) in enumerate(reinhard_stats.items()):
                        im_norm = reinhard(annotation_im.copy(),
                                           v['mu'],
                                           v['sigma'],
                                           src_mu=mu,
                                           src_sigma=sigma)
                        imwrite(
                            join(
                                group_dirs[i],
                                '{}_x_{}_y_{}_norm_{}.png'.format(
                                    filename, x, y, j)), im_norm)

            if background_flag:
                # save coordinates for non-glomeruli images candidates
                background_regions.append({
                    'magnification': save_mag,
                    'left': x,
                    'top': y,
                    'width': fr_tile_size[0],
                    'height': fr_tile_size[1]
                })

    # randomly select background class coordinates
    # - oversample the background class by a factor of the most represented annoation class
    Random(random_seed).shuffle(background_regions)
    if not is_test:
        background_regions = background_regions[:int(
            oversample_background * max(group_annotation_counts))]

    for region in background_regions:
        tile_im = get_region_im(gc, item_id, region)[:, :, :3]

        # save background image
        imwrite(
            join(
                background_dir,
                '{}_x_{}_y_{}.png'.format(filename, region['left'],
                                          region['top'])), tile_im)

        if reinhard_stats is not None:
            # add color augmentation with Reinhard method
            for j, (_, v) in enumerate(reinhard_stats.items()):
                im_norm = reinhard(tile_im.copy(),
                                   v['mu'],
                                   v['sigma'],
                                   src_mu=mu,
                                   src_sigma=sigma)
                imwrite(
                    join(
                        background_dir, '{}_x_{}_y_{}_norm_{}.png'.format(
                            filename, region['left'], region['top'], j)),
                    im_norm)