示例#1
0
def _keep_relevant_elements_for_roi(element_infos,
                                    sf,
                                    mode='manual_bounds',
                                    idx_for_roi=None,
                                    roiinfo=None):

    # This stores information about the ROI like bounds, slide_name, etc
    # Allows passing many parameters and good forward/backward compatibility
    if roiinfo is None:
        roiinfo = dict()

    if mode != "polygonal_bounds":
        # add to bounding boxes dataframe
        element_infos = element_infos.append(
            {
                'xmin': int(roiinfo['XMIN'] * sf),
                'xmax': int(roiinfo['XMAX'] * sf),
                'ymin': int(roiinfo['YMIN'] * sf),
                'ymax': int(roiinfo['YMAX'] * sf)
            },
            ignore_index=True)
        idx_for_roi = element_infos.shape[0] - 1

    # isolate annotations that potentially overlap roi
    overlaps = get_idxs_for_annots_overlapping_roi_by_bbox(
        element_infos, idx_for_roi=idx_for_roi)
    if mode == "polygonal_bounds":
        overlaps = overlaps + [
            idx_for_roi,
        ]
    elinfos_roi = element_infos.loc[overlaps, :]

    # update roiinfo -- remember, annotation elements can be
    # really large and extend beyond the bounds asked by the user.
    # since we're not parsing the polygons into mask form here, and
    # therefore we're not 'cropping' the polygons to the requested bounds,
    # we extend the requested bounds themselves to accomodate the overflowing
    # annotations.
    roiinfo['XMIN'] = int(np.min(elinfos_roi.xmin))
    roiinfo['YMIN'] = int(np.min(elinfos_roi.ymin))
    roiinfo['XMAX'] = int(np.max(elinfos_roi.xmax))
    roiinfo['YMAX'] = int(np.max(elinfos_roi.ymax))
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    # scale back coords
    roiinfo = {k: int(v / sf) for k, v in roiinfo.items()}

    return elinfos_roi, roiinfo
示例#2
0
def get_roi_mask(slide_annotations,
                 element_infos,
                 GTCodes_df,
                 idx_for_roi,
                 iou_thresh=0.0,
                 roiinfo=None,
                 crop_to_roi=True,
                 use_shapely=True,
                 verbose=False,
                 monitorPrefix=""):
    """Parse annotations and gets a ground truth mask for a single ROI.

    This will look at all slide annotations and get ones that
    overlap with the region of interest (ROI) and assigns them to mask.

    Parameters
    -----------
    slide_annotations : list of dicts
        response from server request
    element_infos : pandas DataFrame.
        The columns annidx and elementidx
        encode the dict index of annotation document and element,
        respectively, in the original slide_annotations list of dictionaries.
        This can be obain by get_bboxes_from_slide_annotations() method
    GTCodes_df : pandas Dataframe
        the ground truth codes and information dataframe.
        WARNING: Modified inside this method so pass a copy.
        This is a dataframe that is indexed by the annotation group name and
        has the following columns:
        - group: group name of annotation (string), eg. mostly_tumor
        - overlay_order: int, how early to place the annotation in the
        mask. Larger values means this annotation group is overlayed
        last and overwrites whatever overlaps it.
        - GT_code: int, desired ground truth code (in the mask)
        Pixels of this value belong to corresponding group (class)
        - is_roi: Flag for whether this group encodes an ROI
        - is_background_class: Flag, whether this group is the default
        fill value inside the ROI. For example, you may descide that
        any pixel inside the ROI is considered stroma.
    idx_for_roi : int
        index of ROI within the element_infos dataframe.
    iou_thresh : float
        how much bounding box overlap is enough to
        consider an annotation to belong to the region of interest
    roiinfo : pandas series or dict
        contains information about the roi. Keys will be added to this
        index containing info about the roi like bounding box
        location and size.
    crop_to_roi : bool
        flag of whether to crop polygons to roi
        (prevent overflow beyond roi edge)
    use_shapely : bool
        flag of whether to precisely determine whether an element
        belongs to an ROI using shapely polygons. Slightly slower. If
        set to False, overlapping bounding box is used as a cheap but
        less precise indicator of inclusion.
    verbose : bool
        Print progress to screen?
    monitorPrefix : str
        text to prepend to printed statements

    Returns
    --------
    Np array
        (N x 2), where pixel values encode class membership.
        IMPORTANT NOTE: Zero pixels have special meaning and do NOT
        encode specific ground truth class. Instead, they simply
        mean Outside ROI and should be IGNORED during model training
        or evaluation.
    Dict
        information about ROI

    """
    # This stores information about the ROI like bounds, slide_name, etc
    # Allows passing many parameters and good forward/backward compatibility
    if roiinfo is None:
        roiinfo = dict()

    # isolate annotations that potentially overlap (belong to) mask (incl. ROI)
    overlaps = get_idxs_for_annots_overlapping_roi_by_bbox(
        element_infos, idx_for_roi=idx_for_roi, iou_thresh=iou_thresh)
    idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df,
                                               element_infos=element_infos)
    overlaps = list(set(overlaps) - set(idxs_for_all_rois))
    elinfos_roi = element_infos.loc[[
        idx_for_roi,
    ] + overlaps, :]

    # Add roiinfo
    roiinfo['XMIN'] = int(np.min(elinfos_roi.xmin))
    roiinfo['YMIN'] = int(np.min(elinfos_roi.ymin))
    roiinfo['XMAX'] = int(np.max(elinfos_roi.xmax))
    roiinfo['YMAX'] = int(np.max(elinfos_roi.ymax))
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    # get roi polygon
    if use_shapely:
        coords, _ = _get_element_mask(elinfo=elinfos_roi.loc[idx_for_roi],
                                      slide_annotations=slide_annotations)
        roi_polygon = Polygon(coords)

    # Init mask
    ROI = np.zeros((roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH']),
                   dtype=np.uint8)

    # only parse if roi is polygonal or rectangular
    if elinfos_roi.loc[idx_for_roi, 'type'] == 'point':
        raise Exception("roi cannot be a point!")

    # make sure ROI is overlayed first & assigned background class if relevant
    roi_group = elinfos_roi.loc[idx_for_roi, 'group']
    GTCodes_df.loc[roi_group, 'overlay_order'] = np.min(
        GTCodes_df.loc[:, 'overlay_order']) - 1
    bck_classes = GTCodes_df.loc[GTCodes_df.loc[:,
                                                'is_background_class'] == 1, :]
    if bck_classes.shape[0] > 0:
        GTCodes_df.loc[roi_group,
                       'GT_code'] = bck_classes.iloc[0, :]['GT_code']

    # Add annotations in overlay order
    overlay_orders = sorted(set(GTCodes_df.loc[:, 'overlay_order']))
    N_elements = elinfos_roi.shape[0]
    elNo = 0
    for overlay_level in overlay_orders:

        # get indices of relevant groups
        relevant_groups = list(
            GTCodes_df.loc[GTCodes_df.loc[:, 'overlay_order'] == overlay_level,
                           'group'])
        relIdxs = []
        for group_name in relevant_groups:
            relIdxs.extend(
                list(
                    elinfos_roi.loc[elinfos_roi.group == group_name, :].index))

        # get relevnt infos and sort from largest to smallest (by bbox area)
        # so that the smaller elements are layered last. This helps partially
        # address issues describe in:
        # https://github.com/DigitalSlideArchive/HistomicsTK/issues/675
        elinfos_relevant = elinfos_roi.loc[relIdxs, :].copy()
        elinfos_relevant.sort_values('bbox_area',
                                     axis=0,
                                     ascending=False,
                                     inplace=True)

        # Go through elements and add to ROI mask
        for elId, elinfo in elinfos_relevant.iterrows():

            elNo += 1
            elcountStr = "%s: Overlay level %d: Element %d of %d: %s" % (
                monitorPrefix, overlay_level, elNo, N_elements,
                elinfo['group'])
            if verbose:
                print(elcountStr)

            # now add element to ROI
            ROI = _get_and_add_element_to_roi(
                elinfo=elinfo,
                slide_annotations=slide_annotations,
                ROI=ROI,
                roiinfo=roiinfo,
                roi_polygon=roi_polygon,
                GT_code=GTCodes_df.loc[elinfo['group'], 'GT_code'],
                use_shapely=use_shapely,
                verbose=verbose,
                monitorPrefix=elcountStr)

            # save a copy of ROI-only mask to crop to it later if needed
            if crop_to_roi and (overlay_level
                                == GTCodes_df.loc[roi_group, 'overlay_order']):
                roi_only_mask = ROI.copy()

    # Crop polygons to roi if needed (prevent 'overflow' beyond roi edge)
    if crop_to_roi:
        ROI[roi_only_mask == 0] = 0

    # tighten boundary --remember, so far we've use element bboxes to
    # make an over-estimated margin around ROI boundary.
    nz = np.nonzero(ROI)
    ymin, xmin = [np.min(arr) for arr in nz]
    ymax, xmax = [np.max(arr) for arr in nz]
    ROI = ROI[ymin:ymax, xmin:xmax]

    # update roi offset
    roiinfo['XMIN'] += xmin
    roiinfo['YMIN'] += ymin
    roiinfo['XMAX'] += xmin
    roiinfo['YMAX'] += ymin
    roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN']
    roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN']

    return ROI, roiinfo