def test_get_roi_mask(self): """Test get_roi_mask().""" slide_annotations = gc.get('/annotation/item/' + SAMPLE_SLIDE_ID) element_infos = get_bboxes_from_slide_annotations(slide_annotations) # read ground truth codes and information GTCodes = read_csv(GTCODE_PATH) GTCodes.index = GTCodes.loc[:, 'group'] # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes, element_infos=element_infos) # get roi mask and info ROI, roiinfo = get_roi_mask( slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=idxs_for_all_rois[0], # <- let's focus on first ROI, iou_thresh=0.0, roiinfo=None, crop_to_roi=True, use_shapely=True, verbose=False, monitorPrefix="roi 1") self.assertTupleEqual(ROI.shape, (4594, 4542)) self.assertTupleEqual( (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH'], roiinfo['XMIN'], roiinfo['XMAX'], roiinfo['YMIN'], roiinfo['YMAX']), (4820, 7006, 59206, 66212, 33505, 38325))
def test_get_roi_mask(self): """Test get_roi_mask().""" # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois( GTCodes=cfg.GTcodes, element_infos=cfg.element_infos.copy()) # get roi mask and info ROI, roiinfo = get_roi_mask( slide_annotations=copy.deepcopy(cfg.slide_annotations), element_infos=cfg.element_infos.copy(), GTCodes_df=cfg.GTcodes.copy(), idx_for_roi=idxs_for_all_rois[0], # <- let's focus on first ROI, roiinfo=None, **cfg.get_roi_mask_kwargs) assert ROI.shape == (228, 226) assert (roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH'], roiinfo['XMIN'], roiinfo['XMAX'], roiinfo['YMIN'], roiinfo['YMAX']) == (242, 351, 2966, 3317, 1678, 1920)
def _roi_getter_asis(gc, slide_id, GTCodes_dict, slide_annotations, element_infos, get_kwargs, monitor="", verbose=False): """Download special ROI regions as-is, even if they are very large.""" # get idx of all 'special' roi annotations GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df, element_infos=element_infos) # go through rois and download as-is for roino, idx_for_roi in enumerate(idxs_for_all_rois): roistr = "%s: roi %d of %d" % (monitor, roino + 1, len(idxs_for_all_rois)) if verbose: print(roistr) try: roi_out = get_image_and_mask_from_slide( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, mode='polygonal_bounds', idx_for_roi=idx_for_roi, slide_annotations=slide_annotations, element_infos=element_infos, **get_kwargs) except Exception as e: problem = '\n ' problem += e.__repr__() problem += '\n' warn(problem) roi_out = None yield roi_out
def get_all_rois_from_slide( gc, slide_id, GTCodes_dict, save_directories, get_image_and_mask_from_slide_kwargs=None, slide_name=None, verbose=True, monitorPrefix="", ): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is mainly uses get_image_and_mask_from_slide(), which should be referred to for implementation details. Parameters ----------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. save_directories : dict paths to directories to save data. Each entry is a string, and the following keys are allowed - ROI: path to save masks (labeled images) - rgb: path to save rgb images - contours: path to save annotation contours - visualization: path to save rgb visualzation overlays get_image_and_mask_from_slide_kwargs : dict kwargs to pass to get_image_and_mask_from_slide() default values are assigned if speceific parameters are not given. slide_name : str or None If not given, it's inferred using a server request using girder client. verbose : bool Print progress to screen? monitorPrefix : str text to prepend to printed statements Returns -------- list of dicts each entry contains the following keys - ROI: path to saved mask (labeled image) - rgb: path to saved rgb image - contours: path to saved annotation contours - visualization: path to saved rgb visualzation overlay """ # assign defaults if nothing given default_keyvalues = { 'MPP': 5.0, 'MAG': None, 'get_roi_mask_kwargs': { 'iou_thresh': 0.0, 'crop_to_roi': True, 'use_shapely': True, 'verbose': False }, 'get_contours_kwargs': { 'groups_to_get': None, 'roi_group': 'roi', 'get_roi_contour': True, 'discard_nonenclosed_background': True, 'background_group': 'mostly_stroma', 'MIN_SIZE': 10, 'MAX_SIZE': None, 'verbose': False, 'monitorPrefix': "" }, 'get_rgb': True, 'get_contours': True, 'get_visualization': True, } kvp = get_image_and_mask_from_slide_kwargs or {} # for easy referencing for k, v in default_keyvalues.items(): if k not in kvp.keys(): kvp[k] = v # convert to df and sanity check GTCodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') if any(GTCodes_df.loc[:, 'GT_code'] <= 0): raise Exception("All GT_code must be > 0") # if not given, assign name of first file associated with girder item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=kvp['MPP'], MAG=kvp['MAG']) slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get idx of all 'special' roi annotations idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1, len(idxs_for_all_rois)) # get specified area roi_out = get_image_and_mask_from_slide( gc=gc, slide_id=slide_id, GTCodes_dict=GTCodes_dict, mode='polygonal_bounds', idx_for_roi=idx_for_roi, slide_annotations=slide_annotations, element_infos=element_infos, **kvp) # now save roi (mask, rgb, contours, vis) this_roi_savenames = dict() ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % ( slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'], roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX']) for imtype in ['ROI', 'rgb', 'visualization']: if imtype in roi_out.keys(): savename = os.path.join(save_directories[imtype], ROINAMESTR + ".png") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) imwrite(im=roi_out[imtype], uri=savename) this_roi_savenames[imtype] = savename if 'contours' in roi_out.keys(): savename = os.path.join(save_directories['contours'], ROINAMESTR + ".csv") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) contours_df = DataFrame(roi_out['contours']) contours_df.to_csv(savename) this_roi_savenames['contours'] = savename savenames.append(this_roi_savenames) return savenames
def get_roi_mask(slide_annotations, element_infos, GTCodes_df, idx_for_roi, iou_thresh=0.0, roiinfo=None, crop_to_roi=True, use_shapely=True, verbose=False, monitorPrefix=""): """Parse annotations and gets a ground truth mask for a single ROI. This will look at all slide annotations and get ones that overlap with the region of interest (ROI) and assigns them to mask. Parameters ----------- slide_annotations : list of dicts response from server request element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obain by get_bboxes_from_slide_annotations() method GTCodes_df : pandas Dataframe the ground truth codes and information dataframe. WARNING: Modified inside this method so pass a copy. This is a dataframe that is indexed by the annotation group name and has the following columns: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. idx_for_roi : int index of ROI within the element_infos dataframe. iou_thresh : float how much bounding box overlap is enough to consider an annotation to belong to the region of interest roiinfo : pandas series or dict contains information about the roi. Keys will be added to this index containing info about the roi like bounding box location and size. crop_to_roi : bool flag of whether to crop polygons to roi (prevent overflow beyond roi edge) use_shapely : bool flag of whether to precisely determine whether an element belongs to an ROI using shapely polygons. Slightly slower. If set to False, overlapping bounding box is used as a cheap but less precise indicator of inclusion. verbose : bool Print progress to screen? monitorPrefix : str text to prepend to printed statements Returns -------- Np array (N x 2), where pixel values encode class membership. IMPORTANT NOTE: Zero pixels have special meaning and do NOT encode specific ground truth class. Instead, they simply mean Outside ROI and should be IGNORED during model training or evaluation. Dict information about ROI """ # This stores information about the ROI like bounds, slide_name, etc # Allows passing many parameters and good forward/backward compatibility if roiinfo is None: roiinfo = dict() # isolate annotations that potentially overlap (belong to) mask (incl. ROI) overlaps = get_idxs_for_annots_overlapping_roi_by_bbox( element_infos, idx_for_roi=idx_for_roi, iou_thresh=iou_thresh) idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes_df, element_infos=element_infos) overlaps = list(set(overlaps) - set(idxs_for_all_rois)) elinfos_roi = element_infos.loc[[ idx_for_roi, ] + overlaps, :] # Add roiinfo roiinfo['XMIN'] = int(np.min(elinfos_roi.xmin)) roiinfo['YMIN'] = int(np.min(elinfos_roi.ymin)) roiinfo['XMAX'] = int(np.max(elinfos_roi.xmax)) roiinfo['YMAX'] = int(np.max(elinfos_roi.ymax)) roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN'] roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN'] # get roi polygon if use_shapely: coords, _ = _get_element_mask(elinfo=elinfos_roi.loc[idx_for_roi], slide_annotations=slide_annotations) roi_polygon = Polygon(coords) # Init mask ROI = np.zeros((roiinfo['BBOX_HEIGHT'], roiinfo['BBOX_WIDTH']), dtype=np.uint8) # only parse if roi is polygonal or rectangular if elinfos_roi.loc[idx_for_roi, 'type'] == 'point': raise Exception("roi cannot be a point!") # make sure ROI is overlayed first & assigned background class if relevant roi_group = elinfos_roi.loc[idx_for_roi, 'group'] GTCodes_df.loc[roi_group, 'overlay_order'] = np.min( GTCodes_df.loc[:, 'overlay_order']) - 1 bck_classes = GTCodes_df.loc[GTCodes_df.loc[:, 'is_background_class'] == 1, :] if bck_classes.shape[0] > 0: GTCodes_df.loc[roi_group, 'GT_code'] = bck_classes.iloc[0, :]['GT_code'] # Add annotations in overlay order overlay_orders = sorted(set(GTCodes_df.loc[:, 'overlay_order'])) N_elements = elinfos_roi.shape[0] elNo = 0 for overlay_level in overlay_orders: # get indices of relevant groups relevant_groups = list( GTCodes_df.loc[GTCodes_df.loc[:, 'overlay_order'] == overlay_level, 'group']) relIdxs = [] for group_name in relevant_groups: relIdxs.extend( list( elinfos_roi.loc[elinfos_roi.group == group_name, :].index)) # get relevnt infos and sort from largest to smallest (by bbox area) # so that the smaller elements are layered last. This helps partially # address issues describe in: # https://github.com/DigitalSlideArchive/HistomicsTK/issues/675 elinfos_relevant = elinfos_roi.loc[relIdxs, :].copy() elinfos_relevant.sort_values('bbox_area', axis=0, ascending=False, inplace=True) # Go through elements and add to ROI mask for elId, elinfo in elinfos_relevant.iterrows(): elNo += 1 elcountStr = "%s: Overlay level %d: Element %d of %d: %s" % ( monitorPrefix, overlay_level, elNo, N_elements, elinfo['group']) if verbose: print(elcountStr) # now add element to ROI ROI = _get_and_add_element_to_roi( elinfo=elinfo, slide_annotations=slide_annotations, ROI=ROI, roiinfo=roiinfo, roi_polygon=roi_polygon, GT_code=GTCodes_df.loc[elinfo['group'], 'GT_code'], use_shapely=use_shapely, verbose=verbose, monitorPrefix=elcountStr) # save a copy of ROI-only mask to crop to it later if needed if crop_to_roi and (overlay_level == GTCodes_df.loc[roi_group, 'overlay_order']): roi_only_mask = ROI.copy() # Crop polygons to roi if needed (prevent 'overflow' beyond roi edge) if crop_to_roi: ROI[roi_only_mask == 0] = 0 # tighten boundary --remember, so far we've use element bboxes to # make an over-estimated margin around ROI boundary. nz = np.nonzero(ROI) ymin, xmin = [np.min(arr) for arr in nz] ymax, xmax = [np.max(arr) for arr in nz] ROI = ROI[ymin:ymax, xmin:xmax] # update roi offset roiinfo['XMIN'] += xmin roiinfo['YMIN'] += ymin roiinfo['XMAX'] += xmin roiinfo['YMAX'] += ymin roiinfo['BBOX_WIDTH'] = roiinfo['XMAX'] - roiinfo['XMIN'] roiinfo['BBOX_HEIGHT'] = roiinfo['YMAX'] - roiinfo['YMIN'] return ROI, roiinfo
def get_all_rois_from_slide_v2(gc, slide_id, GTCodes_dict, save_directories, annotations_to_contours_kwargs=None, mode='object', get_mask=True, slide_name=None, verbose=True, monitorprefix="", callback=None, callback_kwargs=None): """Get all ROIs for a slide without an intermediate mask form. This mainly relies on contours_to_labeled_object_mask(), which should be referred to for extra documentation. This can be run in either the "object" mode, whereby the saved masks are a three-channel png where first channel encodes class label (i.e. same as semantic segmentation) and the product of the values in the second and third channel encodes the object ID. Otherwise, the user may decide to run in the "semantic" mode and the resultant mask would consist of only one channel (semantic segmentation with no object differentiation). The difference between this and version 1, found at histomicstk.annotations_and_masks.annotations_to_masks_handler. get_all_rois_from_slide() is that this (version 2) gets the contours first, including cropping to wanted ROI boundaries and other processing using shapely, and THEN parses these into masks. This enables us to differentiate various objects to use the data for object localization or classification or segmentation tasks. If you would like to get semantic segmentation masks, i.e. you do not really care about individual objects, you can use either version 1 or this method. They re-use much of the same code-base, but some edge cases maybe better handled by version 1. For example, since this version uses shapely first to crop, some objects may be incorrectly parsed by shapely. Version 1, using PIL.ImageDraw may not have these problems. Bottom line is: if you need semantic segmentation masks, it is probably safer to use version 1, whereas if you need object segmentation masks, this method should be used. Parameters ---------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCodes_dict : dict the ground truth codes and information dict. This is a dict that is indexed by the annotation group name and each entry is in turn a dict with the following keys: - group: group name of annotation (string), eg. mostly_tumor - overlay_order: int, how early to place the annotation in the mask. Larger values means this annotation group is overlayed last and overwrites whatever overlaps it. - GT_code: int, desired ground truth code (in the mask) Pixels of this value belong to corresponding group (class) - is_roi: Flag for whether this group encodes an ROI - is_background_class: Flag, whether this group is the default fill value inside the ROI. For example, you may descide that any pixel inside the ROI is considered stroma. save_directories : dict paths to directories to save data. Each entry is a string, and the following keys are allowed - ROI: path to save masks (labeled images) - rgb: path to save rgb images - contours: path to save annotation contours - visualization: path to save rgb visualzation overlays mode : str run mode for getting masks. Must me in - object: get 3-channel mask where first channel encodes label (tumor, stroma, etc) while product of second and third channel encodes the object ID (i.e. individual contours) This is useful for object localization and segmentation tasks. - semantic: get a 1-channel mask corresponding to the first channel of the object mode. get_mask : bool While the main purpose of this method IS to get object segmentation masks, it is conceivable that some users might just want to get the RGB and contours. Default is True. annotations_to_contours_kwargs : dict kwargs to pass to annotations_to_contours_no_mask() default values are assigned if specific parameters are not given. slide_name : str or None If not given, its inferred using a server request using girder client. verbose : bool Print progress to screen? monitorprefix : str text to prepend to printed statements callback : function a callback function to run on the roi dictionary output. This is internal, but if you really want to use this, make sure the callback can accept the following keys and that you do NOT assign them yourself gc, slide_id, slide_name, MPP, MAG, verbose, monitorprefix Also, this callback MUST *ONLY* return thr roi dictionary, whether or not it is modified inside it. If it is modified inside the callback then the modified version is the one that will be saved to disk. callback_kwargs : dict kwargs to pass to callback, not including the mandatory kwargs that will be passed internally (mentioned earlier here). Returns -------- list of dicts each entry contains the following keys mask - path to saved mask rgb - path to saved rgb image contours - path to saved annotation contours visualization - path to saved rgb visualzation overlay """ default_keyvalues = { 'MPP': None, 'MAG': None, 'linewidth': 0.2, 'get_rgb': True, 'get_visualization': True, } # assign defaults if nothing given kvp = annotations_to_contours_kwargs or {} # for easy referencing for k, v in default_keyvalues.items(): if k not in kvp.keys(): kvp[k] = v # convert to df and sanity check gtcodes_df = DataFrame.from_dict(GTCodes_dict, orient='index') if any(gtcodes_df.loc[:, 'GT_code'] <= 0): raise Exception("All GT_code must be > 0") # if not given, assign name of first file associated with girder item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor sf, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=kvp['MPP'], MAG=kvp['MAG']) slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get idx of all 'special' roi annotations idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=gtcodes_df, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % (monitorprefix, roino + 1, len(idxs_for_all_rois)) # get specified area roi_out = annotations_to_contours_no_mask( gc=gc, slide_id=slide_id, mode='polygonal_bounds', idx_for_roi=idx_for_roi, slide_annotations=slide_annotations, element_infos=element_infos, **kvp) # get corresponding mask (semantic or object) if get_mask: roi_out['mask'] = contours_to_labeled_object_mask( contours=DataFrame(roi_out['contours']), gtcodes=gtcodes_df, mode=mode, verbose=verbose, monitorprefix=roicountStr) # now run callback on roi_out if callback is not None: # these are 'compulsory' kwargs for the callback # since it will not have access to these otherwise callback_kwargs.update({ 'gc': gc, 'slide_id': slide_id, 'slide_name': slide_name, 'MPP': kvp['MPP'], 'MAG': kvp['MAG'], 'verbose': verbose, 'monitorprefix': roicountStr, }) callback(roi_out, **callback_kwargs) # now save roi (rgb, vis, mask) this_roi_savenames = dict() ROINAMESTR = "%s_left-%d_top-%d_bottom-%d_right-%d" % ( slide_name, roi_out['bounds']['XMIN'], roi_out['bounds']['YMIN'], roi_out['bounds']['YMAX'], roi_out['bounds']['XMAX']) for imtype in ['mask', 'rgb', 'visualization']: if imtype in roi_out.keys(): savename = os.path.join(save_directories[imtype], ROINAMESTR + ".png") if verbose: print("%s: Saving %s" % (roicountStr, savename)) imwrite(im=roi_out[imtype], uri=savename) this_roi_savenames[imtype] = savename # save contours savename = os.path.join(save_directories['contours'], ROINAMESTR + ".csv") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) contours_df = DataFrame(roi_out['contours']) contours_df.to_csv(savename) this_roi_savenames['contours'] = savename savenames.append(this_roi_savenames) return savenames
def get_all_roi_masks_for_slide( input_img, input_ann, GTCODE_PATH, MASK_SAVEPATH, slide_name=None, verbose=True, monitorPrefix="", get_roi_mask_kwargs=dict()): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ----------- input_img : object input large image object input_ann : object input annotation object GTCODE_PATH : str path to the ground truth codes and information csv file. Refer to the docstring of get_roi_mask() for more info. MASK_SAVEPATH : str path to directory to save ROI masks slide_name (optional) : str If not given, it's inferred using a server request using girder client. verbose (optional) : bool Print progress to screen? monitorPrefix (optional) : str text to prepend to printed statements get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() Returns -------- list of strs save paths for ROIs """ # read ground truth codes and information GTCodes = read_csv(GTCODE_PATH) GTCodes.index = GTCodes.loc[:, 'group'] if any(GTCodes.loc[:, 'GT_code'] <= 0): raise Exception("All GT_code must be > 0") # get annotations for slide slide_annotations = input_ann # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois( GTCodes=GTCodes, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % ( monitorPrefix, roino + 1, len(idxs_for_all_rois)) # get roi mask and info ROI, roiinfo = get_roi_mask( slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=idx_for_roi, monitorPrefix=roicountStr, **get_roi_mask_kwargs) ## make directory for the mask MASK_SAVEPATH_MASK = MASK_SAVEPATH + '/mask' # create folders if necessary for folder in [MASK_SAVEPATH_MASK, ]: try: os.mkdir(folder) except: pass # now save roi ROINAMESTR = "%s_left-%d_top-%d" % ( slide_name, roiinfo['XMIN'], roiinfo['YMIN']) savename = os.path.join(MASK_SAVEPATH_MASK, ROINAMESTR + ".png") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) imwrite(im=ROI, uri=savename) region = [roiinfo['XMIN'], roiinfo['YMIN'], roiinfo['BBOX_WIDTH'], roiinfo['BBOX_HEIGHT']] maxRegionSize = 5000 ## make directory for the region MASK_SAVEPATH_REG = MASK_SAVEPATH + '/region' # create folders if necessary for folder in [MASK_SAVEPATH_REG, ]: try: os.mkdir(folder) except: pass #######save images im_input = input_img.getRegion( format=large_image.tilesource.TILE_FORMAT_NUMPY, **utils.get_region_dict(region, maxRegionSize, input_img))[0] ROINAMESTR1 = "%s_left-%d_top-%d" % ( slide_name, roiinfo['XMIN'], roiinfo['YMIN']) savename1 = os.path.join(MASK_SAVEPATH_REG, ROINAMESTR1 + ".png") skimage.io.imsave(savename1, im_input) if verbose: print("%s: Saving %s\n" % (roicountStr, savename1)) savenames.append(savename) return savenames
def get_all_roi_masks_for_slide(gc, slide_id, GTCODE_PATH, MASK_SAVEPATH, slide_name=None, verbose=True, monitorPrefix="", get_roi_mask_kwargs=dict()): """Parse annotations and saves ground truth masks for ALL ROIs. Get all ROIs in a single slide. This is a wrapper around get_roi_mask() which should be referred to for implementation details. Parameters ----------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) GTCODE_PATH : str path to the ground truth codes and information csv file. Refer to the docstring of get_roi_mask() for more info. MASK_SAVEPATH : str path to directory to save ROI masks slide_name (optional) : str If not given, it's inferred using a server request using girder client. verbose (optional) : bool Print progress to screen? monitorPrefix (optional) : str text to prepend to printed statements get_roi_mask_kwargs : dict extra kwargs for get_roi_mask() Returns -------- list of strs save paths for ROIs """ # if not given, assign name of first file associated with item if slide_name is None: resp = gc.get('/item/%s/files' % slide_id) slide_name = resp[0]['name'] slide_name = slide_name[:slide_name.rfind('.')] # read ground truth codes and information GTCodes = read_csv(GTCODE_PATH) GTCodes.index = GTCodes.loc[:, 'group'] assert all(GTCodes.loc[:, 'GT_code'] > 0), "All GT_code must be > 0" # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # get bounding box information for all annotations element_infos = get_bboxes_from_slide_annotations(slide_annotations) # get indices of rois idxs_for_all_rois = _get_idxs_for_all_rois(GTCodes=GTCodes, element_infos=element_infos) savenames = [] for roino, idx_for_roi in enumerate(idxs_for_all_rois): roicountStr = "%s: roi %d of %d" % (monitorPrefix, roino + 1, len(idxs_for_all_rois)) # get roi mask and info ROI, roiinfo = get_roi_mask(slide_annotations=slide_annotations, element_infos=element_infos, GTCodes_df=GTCodes.copy(), idx_for_roi=idx_for_roi, monitorPrefix=roicountStr, **get_roi_mask_kwargs) # now save roi ROINAMESTR = "%s_left-%d_top-%d_mag-BASE" % ( slide_name, roiinfo['XMIN'], roiinfo['YMIN']) savename = os.path.join(MASK_SAVEPATH, ROINAMESTR + ".png") if verbose: print("%s: Saving %s\n" % (roicountStr, savename)) imwrite(im=ROI, uri=savename) savenames.append(savename) return savenames