def test_get_roi_mask(self):
        """Test get_roi_mask()."""
        slide_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID)
        element_infos = get_bboxes_from_slide_annotations(slide_annotations)

        # read ground truth codes and information
        GTCodes = read_csv(GTCODE_PATH)
        GTCodes.index = GTCodes.loc[:, 'group']

        # get indices of rois
        idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes,
                                                   element_infos=element_infos)

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(
            slide_annotations=slide_annotations,
            element_infos=element_infos,
            GTCodes_df=GTCodes.copy(),
            idx_for_roi=idxs_for_all_rois[0],  # <- let's focus on first ROI,
            iou_thresh=0.0,
            roiinfo=None,
            crop_to_roi=True,
            use_shapely=True,
            verbose=False,
            monitorPrefix="roi 1")

        self.assertTupleEqual(ROI.shape, (4594, 4542))
        self.assertTupleEqual(
            (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH'], roiinfo['XMIN'],
             roiinfo['XMAX'], roiinfo['YMIN'], roiinfo['YMAX']),
            (4820, 7006, 59206, 66212, 33505, 38325))
Exemplo n.º 2
0
    def test_get_roi_mask(self):
        """Test get_roi_mask()."""
        # get indices of rois
        idxs_for_all_rois = _get_idxs_for_all_rois(
            GTCodes=cfg.GTcodes, element_infos=cfg.element_infos.copy())

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(
            slide_annotations=copy.deepcopy(cfg.slide_annotations),
            element_infos=cfg.element_infos.copy(),
            GTCodes_df=cfg.GTcodes.copy(),
            idx_for_roi=idxs_for_all_rois[0],  # <- let's focus on first ROI,
            roiinfo=None,
            **cfg.get_roi_mask_kwargs)

        assert ROI.shape == (228, 226)
        assert (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH'], roiinfo['XMIN'],
                roiinfo['XMAX'], roiinfo['YMIN'],
                roiinfo['YMAX']) == (242, 351, 2966, 3317, 1678, 1920)
def _roi_getter_asis(gc,
                     slide_id,
                     GTCodes_dict,
                     slide_annotations,
                     element_infos,
                     get_kwargs,
                     monitor="",
                     verbose=False):
    """Download special ROI regions as-is, even if they are very large."""
    # get idx of all 'special' roi annotations
    GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df,
                                               element_infos=element_infos)

    # go through rois and download as-is
    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roistr = "%s: roi %d of %d" % (monitor, roino + 1,
                                       len(idxs_for_all_rois))
        if verbose:
            print(roistr)

        try:
            roi_out = get_image_and_mask_from_slide(
                gc=gc,
                slide_id=slide_id,
                GTCodes_dict=GTCodes_dict,
                mode='polygonal_bounds',
                idx_for_roi=idx_for_roi,
                slide_annotations=slide_annotations,
                element_infos=element_infos,
                **get_kwargs)
        except Exception as e:
            problem = '\n   '
            problem += e.__repr__()
            problem += '\n'
            warn(problem)
            roi_out = None

        yield roi_out
Exemplo n.º 4
0
def get_all_rois_from_slide(
    gc,
    slide_id,
    GTCodes_dict,
    save_directories,
    get_image_and_mask_from_slide_kwargs=None,
    slide_name=None,
    verbose=True,
    monitorPrefix="",
):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is mainly uses
    get_image_and_mask_from_slide(), which should be referred to
    for implementation details.

    Parameters
    -----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.

    save_directories : dict
        paths to directories to save data. Each entry is a string, and the
        following keys are allowed
        - ROI: path to save masks (labeled images)
        - rgb: path to save rgb images
        - contours: path to save annotation contours
        - visualization: path to save rgb visualzation overlays

    get_image_and_mask_from_slide_kwargs : dict
        kwargs to pass to get_image_and_mask_from_slide()
        default values are assigned if speceific parameters are not given.

    slide_name : str or None
        If not given, it's inferred using a server request using girder client.

    verbose : bool
        Print progress to screen?

    monitorPrefix : str
        text to prepend to printed statements

    Returns
    --------
    list of dicts
        each entry contains the following keys
        - ROI: path to saved mask (labeled image)
        - rgb: path to saved rgb image
        - contours: path to saved annotation contours
        - visualization: path to saved rgb visualzation overlay

    """
    # assign defaults if nothing given
    default_keyvalues = {
        'MPP': 5.0,
        'MAG': None,
        'get_roi_mask_kwargs': {
            'iou_thresh': 0.0,
            'crop_to_roi': True,
            'use_shapely': True,
            'verbose': False
        },
        'get_contours_kwargs': {
            'groups_to_get': None,
            'roi_group': 'roi',
            'get_roi_contour': True,
            'discard_nonenclosed_background': True,
            'background_group': 'mostly_stroma',
            'MIN_SIZE': 10,
            'MAX_SIZE': None,
            'verbose': False,
            'monitorPrefix': ""
        },
        'get_rgb': True,
        'get_contours': True,
        'get_visualization': True,
    }

    kvp = get_image_and_mask_from_slide_kwargs or {}  # for easy referencing
    for k, v in default_keyvalues.items():
        if k not in kvp.keys():
            kvp[k] = v

    # convert to df and sanity check
    GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    if any(GTCodes_df.loc[:, 'GT_code'] <= 0):
        raise Exception("All GT_code must be > 0")

    # if not given, assign name of first file associated with girder item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(gc=gc,
                                           slide_id=slide_id,
                                           MPP=kvp['MPP'],
                                           MAG=kvp['MAG'])
    slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get idx of all 'special' roi annotations
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df,
                                               element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1,
                                            len(idxs_for_all_rois))

        # get specified area
        roi_out = get_image_and_mask_from_slide(
            gc=gc,
            slide_id=slide_id,
            GTCodes_dict=GTCodes_dict,
            mode='polygonal_bounds',
            idx_for_roi=idx_for_roi,
            slide_annotations=slide_annotations,
            element_infos=element_infos,
            **kvp)

        # now save roi (mask, rgb, contours, vis)

        this_roi_savenames = dict()
        ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % (
            slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'],
            roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX'])

        for imtype in ['ROI', 'rgb', 'visualization']:
            if imtype in roi_out.keys():
                savename = os.path.join(save_directories[imtype],
                                        ROINAMESTR + ".png")
                if verbose:
                    print("%s: Saving %s\n" % (roicountStr, savename))
                imwrite(im=roi_out[imtype], uri=savename)
                this_roi_savenames[imtype] = savename

        if 'contours' in roi_out.keys():
            savename = os.path.join(save_directories['contours'],
                                    ROINAMESTR + ".csv")
            if verbose:
                print("%s: Saving %s\n" % (roicountStr, savename))
            contours_df = DataFrame(roi_out['contours'])
            contours_df.to_csv(savename)
            this_roi_savenames['contours'] = savename

        savenames.append(this_roi_savenames)

    return savenames
Exemplo n.º 5
0
def get_roi_mask(slide_annotations,
                 element_infos,
                 GTCodes_df,
                 idx_for_roi,
                 iou_thresh=0.0,
                 roiinfo=None,
                 crop_to_roi=True,
                 use_shapely=True,
                 verbose=False,
                 monitorPrefix=""):
    """Parse annotations and gets a ground truth mask for a single ROI.

    This will look at all slide annotations and get ones that
    overlap with the region of interest (ROI) and assigns them to mask.

    Parameters
    -----------
    slide_annotations : list of dicts
        response from server request
    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obain by get_bboxes_from_slide_annotations() method
    GTCodes_df : pandas Dataframe
        the ground truth codes and information dataframe.
        WARNING: Modified inside this method so pass a copy.
        This is a dataframe that is indexed by the annotation group name and
        has the following columns:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.
    idx_for_roi : int
        index of ROI within the element_infos dataframe.
    iou_thresh : float
        how much bounding box overlap is enough to
        consider an annotation to belong to the region of interest
    roiinfo : pandas series or dict
        contains information about the roi. Keys will be added to this
        index containing info about the roi like bounding box
        location and size.
    crop_to_roi : bool
        flag of whether to crop polygons to roi
        (prevent overflow beyond roi edge)
    use_shapely : bool
        flag of whether to precisely determine whether an element
        belongs to an ROI using shapely polygons. Slightly slower. If
        set to False, overlapping bounding box is used as a cheap but
        less precise indicator of inclusion.
    verbose : bool
        Print progress to screen?
    monitorPrefix : str
        text to prepend to printed statements

    Returns
    --------
    Np array
        (N x 2), where pixel values encode class membership.
        IMPORTANT NOTE: Zero pixels have special meaning and do NOT
        encode specific ground truth class. Instead, they simply
        mean Outside ROI and should be IGNORED during model training
        or evaluation.
    Dict
        information about ROI

    """
    # This stores information about the ROI like bounds, slide_name, etc
    # Allows passing many parameters and good forward/backward compatibility
    if roiinfo is None:
        roiinfo = dict()

    # isolate annotations that potentially overlap (belong to) mask (incl. ROI)
    overlaps = get_idxs_for_annots_overlapping_roi_by_bbox(
        element_infos, idx_for_roi=idx_for_roi, iou_thresh=iou_thresh)
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df,
                                               element_infos=element_infos)
    overlaps = list(set(overlaps) - set(idxs_for_all_rois))
    elinfos_roi = element_infos.loc[[
        idx_for_roi,
    ] + overlaps, :]

    # Add roiinfo
    roiinfo['XMIN'] = int(np.min(elinfos_roi.xmin))
    roiinfo['YMIN'] = int(np.min(elinfos_roi.ymin))
    roiinfo['XMAX'] = int(np.max(elinfos_roi.xmax))
    roiinfo['YMAX'] = int(np.max(elinfos_roi.ymax))
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    # get roi polygon
    if use_shapely:
        coords, _ = _get_element_mask(elinfo=elinfos_roi.loc[idx_for_roi],
                                      slide_annotations=slide_annotations)
        roi_polygon = Polygon(coords)

    # Init mask
    ROI = np.zeros((roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH']),
                   dtype=np.uint8)

    # only parse if roi is polygonal or rectangular
    if elinfos_roi.loc[idx_for_roi, 'type'] == 'point':
        raise Exception("roi cannot be a point!")

    # make sure ROI is overlayed first & assigned background class if relevant
    roi_group = elinfos_roi.loc[idx_for_roi, 'group']
    GTCodes_df.loc[roi_group, 'overlay_order'] = np.min(
        GTCodes_df.loc[:, 'overlay_order']) - 1
    bck_classes = GTCodes_df.loc[GTCodes_df.loc[:,
                                                'is_background_class'] == 1, :]
    if bck_classes.shape[0] > 0:
        GTCodes_df.loc[roi_group,
                       'GT_code'] = bck_classes.iloc[0, :]['GT_code']

    # Add annotations in overlay order
    overlay_orders = sorted(set(GTCodes_df.loc[:, 'overlay_order']))
    N_elements = elinfos_roi.shape[0]
    elNo = 0
    for overlay_level in overlay_orders:

        # get indices of relevant groups
        relevant_groups = list(
            GTCodes_df.loc[GTCodes_df.loc[:, 'overlay_order'] == overlay_level,
                           'group'])
        relIdxs = []
        for group_name in relevant_groups:
            relIdxs.extend(
                list(
                    elinfos_roi.loc[elinfos_roi.group == group_name, :].index))

        # get relevnt infos and sort from largest to smallest (by bbox area)
        # so that the smaller elements are layered last. This helps partially
        # address issues describe in:
        # https://github.com/DigitalSlideArchive/HistomicsTK/issues/675
        elinfos_relevant = elinfos_roi.loc[relIdxs, :].copy()
        elinfos_relevant.sort_values('bbox_area',
                                     axis=0,
                                     ascending=False,
                                     inplace=True)

        # Go through elements and add to ROI mask
        for elId, elinfo in elinfos_relevant.iterrows():

            elNo += 1
            elcountStr = "%s: Overlay level %d: Element %d of %d: %s" % (
                monitorPrefix, overlay_level, elNo, N_elements,
                elinfo['group'])
            if verbose:
                print(elcountStr)

            # now add element to ROI
            ROI = _get_and_add_element_to_roi(
                elinfo=elinfo,
                slide_annotations=slide_annotations,
                ROI=ROI,
                roiinfo=roiinfo,
                roi_polygon=roi_polygon,
                GT_code=GTCodes_df.loc[elinfo['group'], 'GT_code'],
                use_shapely=use_shapely,
                verbose=verbose,
                monitorPrefix=elcountStr)

            # save a copy of ROI-only mask to crop to it later if needed
            if crop_to_roi and (overlay_level
                                == GTCodes_df.loc[roi_group, 'overlay_order']):
                roi_only_mask = ROI.copy()

    # Crop polygons to roi if needed (prevent 'overflow' beyond roi edge)
    if crop_to_roi:
        ROI[roi_only_mask == 0] = 0

    # tighten boundary --remember, so far we've use element bboxes to
    # make an over-estimated margin around ROI boundary.
    nz = np.nonzero(ROI)
    ymin, xmin = [np.min(arr) for arr in nz]
    ymax, xmax = [np.max(arr) for arr in nz]
    ROI = ROI[ymin:ymax, xmin:xmax]

    # update roi offset
    roiinfo['XMIN'] += xmin
    roiinfo['YMIN'] += ymin
    roiinfo['XMAX'] += xmin
    roiinfo['YMAX'] += ymin
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    return ROI, roiinfo
Exemplo n.º 6
0
def get_all_rois_from_slide_v2(gc,
                               slide_id,
                               GTCodes_dict,
                               save_directories,
                               annotations_to_contours_kwargs=None,
                               mode='object',
                               get_mask=True,
                               slide_name=None,
                               verbose=True,
                               monitorprefix="",
                               callback=None,
                               callback_kwargs=None):
    """Get all ROIs for a slide without an intermediate mask form.

    This mainly relies on contours_to_labeled_object_mask(), which should
    be referred to for extra documentation.

    This can be run in either the "object" mode, whereby the saved masks
    are a three-channel png where first channel encodes class label (i.e.
    same as semantic segmentation) and the product of the values in the
    second and third channel encodes the object ID. Otherwise, the user
    may decide to run in the "semantic" mode and the resultant mask would
    consist of only one channel (semantic segmentation with no object
    differentiation).

    The difference between this and version 1, found at
    histomicstk.annotations_and_masks.annotations_to_masks_handler.
    get_all_rois_from_slide()
    is that this (version 2) gets the contours first, including cropping
    to wanted ROI boundaries and other processing using shapely, and THEN
    parses these into masks. This enables us to differentiate various objects
    to use the data for object localization or classification or segmentation
    tasks. If you would like to get semantic segmentation masks, i.e. you do
    not really care about individual objects, you can use either version 1
    or this method. They re-use much of the same code-base, but some edge
    cases maybe better handled by version 1. For example, since
    this version uses shapely first to crop, some objects may be incorrectly
    parsed by shapely. Version 1, using PIL.ImageDraw may not have these
    problems.

    Bottom line is: if you need semantic segmentation masks, it is probably
    safer to use version 1, whereas if you need object segmentation masks,
    this method should be used.

    Parameters
    ----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)

    slide_id : str
        girder id for item (slide)

    GTCodes_dict : dict
        the ground truth codes and information dict.
        This is a dict that is indexed by the annotation group name and
        each entry is in turn a dict with the following keys:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.

    save_directories : dict
        paths to directories to save data. Each entry is a string, and the
        following keys are allowed
        - ROI: path to save masks (labeled images)
        - rgb: path to save rgb images
        - contours: path to save annotation contours
        - visualization: path to save rgb visualzation overlays

    mode : str
        run mode for getting masks. Must me in
        - object: get 3-channel mask where first channel encodes label
        (tumor, stroma, etc) while product of second and third
        channel encodes the object ID (i.e. individual contours)
        This is useful for object localization and segmentation tasks.
        - semantic: get a 1-channel mask corresponding to the first channel
        of the object mode.

    get_mask : bool
        While the main purpose of this method IS to get object segmentation
        masks, it is conceivable that some users might just want to get
        the RGB and contours. Default is True.

    annotations_to_contours_kwargs : dict
        kwargs to pass to annotations_to_contours_no_mask()
        default values are assigned if specific parameters are not given.

    slide_name : str or None
        If not given, its inferred using a server request using girder client.

    verbose : bool
        Print progress to screen?

    monitorprefix : str
        text to prepend to printed statements

    callback : function
        a callback function to run on the roi dictionary output. This is
        internal, but if you really want to use this, make sure the callback
        can accept the following keys and that you do NOT assign them yourself
        gc, slide_id, slide_name, MPP, MAG, verbose, monitorprefix
        Also, this callback MUST *ONLY* return thr roi dictionary, whether
        or not it is modified inside it. If it is modified inside the callback
        then the modified version is the one that will be saved to disk.

    callback_kwargs : dict
        kwargs to pass to callback, not including the mandatory kwargs
        that will be passed internally (mentioned earlier here).

    Returns
    --------
    list of dicts
        each entry contains the following keys
        mask - path to saved mask
        rgb - path to saved rgb image
        contours - path to saved annotation contours
        visualization - path to saved rgb visualzation overlay

    """
    default_keyvalues = {
        'MPP': None,
        'MAG': None,
        'linewidth': 0.2,
        'get_rgb': True,
        'get_visualization': True,
    }

    # assign defaults if nothing given
    kvp = annotations_to_contours_kwargs or {}  # for easy referencing
    for k, v in default_keyvalues.items():
        if k not in kvp.keys():
            kvp[k] = v

    # convert to df and sanity check
    gtcodes_df = DataFrame.from_dict(GTCodes_dict, orient='index')
    if any(gtcodes_df.loc[:, 'GT_code'] <= 0):
        raise Exception("All GT_code must be > 0")

    # if not given, assign name of first file associated with girder item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # scale up/down annotations by a factor
    sf, _ = get_scale_factor_and_appendStr(gc=gc,
                                           slide_id=slide_id,
                                           MPP=kvp['MPP'],
                                           MAG=kvp['MAG'])
    slide_annotations = scale_slide_annotations(slide_annotations, sf=sf)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get idx of all 'special' roi annotations
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=gtcodes_df,
                                               element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (monitorprefix, roino + 1,
                                            len(idxs_for_all_rois))

        # get specified area
        roi_out = annotations_to_contours_no_mask(
            gc=gc,
            slide_id=slide_id,
            mode='polygonal_bounds',
            idx_for_roi=idx_for_roi,
            slide_annotations=slide_annotations,
            element_infos=element_infos,
            **kvp)

        # get corresponding mask (semantic or object)
        if get_mask:
            roi_out['mask'] = contours_to_labeled_object_mask(
                contours=DataFrame(roi_out['contours']),
                gtcodes=gtcodes_df,
                mode=mode,
                verbose=verbose,
                monitorprefix=roicountStr)

        # now run callback on roi_out
        if callback is not None:
            # these are 'compulsory' kwargs for the callback
            # since it will not have access to these otherwise
            callback_kwargs.update({
                'gc': gc,
                'slide_id': slide_id,
                'slide_name': slide_name,
                'MPP': kvp['MPP'],
                'MAG': kvp['MAG'],
                'verbose': verbose,
                'monitorprefix': roicountStr,
            })
            callback(roi_out, **callback_kwargs)

        # now save roi (rgb, vis, mask)

        this_roi_savenames = dict()
        ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % (
            slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'],
            roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX'])

        for imtype in ['mask', 'rgb', 'visualization']:
            if imtype in roi_out.keys():
                savename = os.path.join(save_directories[imtype],
                                        ROINAMESTR + ".png")
                if verbose:
                    print("%s: Saving %s" % (roicountStr, savename))
                imwrite(im=roi_out[imtype], uri=savename)
                this_roi_savenames[imtype] = savename

        # save contours
        savename = os.path.join(save_directories['contours'],
                                ROINAMESTR + ".csv")
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename))
        contours_df = DataFrame(roi_out['contours'])
        contours_df.to_csv(savename)
        this_roi_savenames['contours'] = savename

        savenames.append(this_roi_savenames)

    return savenames
Exemplo n.º 7
0
def get_all_roi_masks_for_slide(
        input_img, input_ann, GTCODE_PATH, MASK_SAVEPATH, slide_name=None,
        verbose=True, monitorPrefix="", get_roi_mask_kwargs=dict()):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is a wrapper around get_roi_mask()
    which should be referred to for implementation details.

    Parameters
    -----------
    input_img : object
        input large image object
    input_ann : object
        input annotation object
    GTCODE_PATH : str
        path to the ground truth codes and information
        csv file. Refer to the docstring of get_roi_mask() for more info.
    MASK_SAVEPATH : str
        path to directory to save ROI masks
    slide_name (optional) : str
        If not given, it's inferred using a server request using girder client.
    verbose (optional) : bool
        Print progress to screen?
    monitorPrefix (optional) : str
        text to prepend to printed statements
    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    Returns
    --------
    list of strs
        save paths for ROIs

    """
    # read ground truth codes and information
    GTCodes = read_csv(GTCODE_PATH)
    GTCodes.index = GTCodes.loc[:, 'group']
    if any(GTCodes.loc[:, 'GT_code'] <= 0):
        raise Exception("All GT_code must be > 0")

    # get annotations for slide
    slide_annotations = input_ann

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get indices of rois
    idxs_for_all_rois = _get_idxs_for_all_rois(
        GTCodes=GTCodes, element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (
            monitorPrefix, roino + 1, len(idxs_for_all_rois))

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(
            slide_annotations=slide_annotations, element_infos=element_infos,
            GTCodes_df=GTCodes.copy(), idx_for_roi=idx_for_roi,
            monitorPrefix=roicountStr, **get_roi_mask_kwargs)

        ## make directory for the mask
        MASK_SAVEPATH_MASK = MASK_SAVEPATH + '/mask'
        # create folders if necessary
        for folder in [MASK_SAVEPATH_MASK, ]:
            try:
                os.mkdir(folder)
            except:
                pass

        # now save roi
        ROINAMESTR = "%s_left-%d_top-%d" % (
            slide_name, roiinfo['XMIN'], roiinfo['YMIN'])
        savename = os.path.join(MASK_SAVEPATH_MASK, ROINAMESTR + ".png")
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename))
        imwrite(im=ROI, uri=savename)

        region = [roiinfo['XMIN'], roiinfo['YMIN'], roiinfo['BBOX_WIDTH'], roiinfo['BBOX_HEIGHT']]
        maxRegionSize = 5000

        ## make directory for the region
        MASK_SAVEPATH_REG = MASK_SAVEPATH + '/region'
        # create folders if necessary
        for folder in [MASK_SAVEPATH_REG, ]:
            try:
                os.mkdir(folder)
            except:
                pass

        #######save images
        im_input = input_img.getRegion(
            format=large_image.tilesource.TILE_FORMAT_NUMPY,
            **utils.get_region_dict(region, maxRegionSize, input_img))[0]

        ROINAMESTR1 = "%s_left-%d_top-%d" % (
            slide_name, roiinfo['XMIN'], roiinfo['YMIN'])
        savename1 = os.path.join(MASK_SAVEPATH_REG, ROINAMESTR1 + ".png")
        skimage.io.imsave(savename1, im_input)
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename1))

        savenames.append(savename)

    return savenames
Exemplo n.º 8
0
def get_all_roi_masks_for_slide(gc,
                                slide_id,
                                GTCODE_PATH,
                                MASK_SAVEPATH,
                                slide_name=None,
                                verbose=True,
                                monitorPrefix="",
                                get_roi_mask_kwargs=dict()):
    """Parse annotations and saves ground truth masks for ALL ROIs.

    Get all ROIs in a single slide. This is a wrapper around get_roi_mask()
    which should be referred to for implementation details.

    Parameters
    -----------
    gc : object
        girder client object to make requests, for example:
        gc = girder_client.GirderClient(apiUrl = APIURL)
        gc.authenticate(interactive=True)
    slide_id : str
        girder id for item (slide)
    GTCODE_PATH : str
        path to the ground truth codes and information
        csv file. Refer to the docstring of get_roi_mask() for more info.
    MASK_SAVEPATH : str
        path to directory to save ROI masks
    slide_name (optional) : str
        If not given, it's inferred using a server request using girder client.
    verbose (optional) : bool
        Print progress to screen?
    monitorPrefix (optional) : str
        text to prepend to printed statements
    get_roi_mask_kwargs : dict
        extra kwargs for get_roi_mask()

    Returns
    --------
    list of strs
        save paths for ROIs

    """
    # if not given, assign name of first file associated with item
    if slide_name is None:
        resp = gc.get('/item/%s/files' % slide_id)
        slide_name = resp[0]['name']
        slide_name = slide_name[:slide_name.rfind('.')]

    # read ground truth codes and information
    GTCodes = read_csv(GTCODE_PATH)
    GTCodes.index = GTCodes.loc[:, 'group']
    assert all(GTCodes.loc[:, 'GT_code'] > 0), "All GT_code must be > 0"

    # get annotations for slide
    slide_annotations = gc.get('/annotation/item/' + slide_id)

    # get bounding box information for all annotations
    element_infos = get_bboxes_from_slide_annotations(slide_annotations)

    # get indices of rois
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes,
                                               element_infos=element_infos)

    savenames = []

    for roino, idx_for_roi in enumerate(idxs_for_all_rois):

        roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1,
                                            len(idxs_for_all_rois))

        # get roi mask and info
        ROI, roiinfo = get_roi_mask(slide_annotations=slide_annotations,
                                    element_infos=element_infos,
                                    GTCodes_df=GTCodes.copy(),
                                    idx_for_roi=idx_for_roi,
                                    monitorPrefix=roicountStr,
                                    **get_roi_mask_kwargs)

        # now save roi
        ROINAMESTR = "%s_left-%d_top-%d_mag-BASE" % (
            slide_name, roiinfo['XMIN'], roiinfo['YMIN'])
        savename = os.path.join(MASK_SAVEPATH, ROINAMESTR + ".png")
        if verbose:
            print("%s: Saving %s\n" % (roicountStr, savename))
        imwrite(im=ROI, uri=savename)

        savenames.append(savename)

    return savenames