def _get_roi_from_contours(gc, contours: DataFrame, GTCodes_df: DataFrame, fovbounds: dict, fmeta: Union[dict, Series]): """""" # anchor mask and rgb roi_out = { 'contours': contours.loc[:, ['anchor_id', 'group', 'xmin', 'ymin', 'xmax', 'ymax']], 'mask': contours_to_labeled_object_mask(contours=contours.copy(), gtcodes=GTCodes_df, mode='object'), 'rgb': _get_rgb_for_interrater(gc=gc, bounds=fovbounds, slide_id=fmeta['slide_id']), } # resize mask to rgb in case there's a couple of pixel # difference due to float rounding errors roi_out['mask'] = np.uint8( resize(roi_out['mask'], output_shape=np.array(roi_out['rgb']).shape[:2], order=0, preserve_range=True, anti_aliasing=False)) # visualize roi_out['vis'] = _visualize_annotations_on_rgb( rgb=roi_out['rgb'], contours_list=contours.to_dict(orient='records'), linewidth=0.2, x_offset=0, y_offset=0, text=True) return roi_out
def _get_visualization_zoomout( gc, slide_id, bounds, MPP, MAG, zoomout=4): """Get a zoomed out visualization of ROI RGB and annotation overlay. Parameters ---------- gc : girder_client.Girder_Client authenticated girder client slide_id : str girder ID of slide bounds : dict bounds of the region of interest. Must contain the keys XMIN, XMAX, YMIN, YMAX MPP : float Microns per pixel. MAG : float Magnification. MPP overrides this. zoomout : float how much to zoom out Returns ------- np.array Zoomed out visualization. Outpu from _visualize_annotations_on_rgb(). """ # get append string for server request if MPP is not None: getsf_kwargs = { 'MPP': MPP * (zoomout + 1), 'MAG': None, } elif MAG is not None: getsf_kwargs = { 'MPP': None, 'MAG': MAG / (zoomout + 1), } else: getsf_kwargs = { 'MPP': None, 'MAG': None, } sf, appendStr = get_scale_factor_and_appendStr( gc=gc, slide_id=slide_id, **getsf_kwargs) # now get low-magnification surrounding field x_margin = (bounds['XMAX'] - bounds['XMIN']) * zoomout / 2 y_margin = (bounds['YMAX'] - bounds['YMIN']) * zoomout / 2 getStr = \ "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" \ % (slide_id, max(0, bounds['XMIN'] - x_margin), bounds['XMAX'] + x_margin, max(0, bounds['YMIN'] - y_margin), bounds['YMAX'] + y_margin) getStr += appendStr resp = gc.get(getStr, jsonResp=False) rgb_zoomout = get_image_from_htk_response(resp) # plot a bounding box at the ROI region xmin = x_margin * sf xmax = xmin + (bounds['XMAX'] - bounds['XMIN']) * sf ymin = y_margin * sf ymax = ymin + (bounds['YMAX'] - bounds['YMIN']) * sf xmin, xmax, ymin, ymax = [str(int(j)) for j in (xmin, xmax, ymin, ymax)] contours_list = [{ 'color': 'rgb(255,255,0)', 'coords_x': ",".join([xmin, xmax, xmax, xmin, xmin]), 'coords_y': ",".join([ymin, ymin, ymax, ymax, ymin]), }] return _visualize_annotations_on_rgb(rgb_zoomout, contours_list)
def evaluate_maskrcnn_fold_on_inferred_truth(fold: int, cfg, model_root: str, model_name: str, whoistruth='Ps', evalset='E', getmetrics=True, n_vis=100): model_folder = opj(model_root, f'fold_{fold}') checkpoint_path = opj(model_folder, f'{model_name}.ckpt') savepath = opj(model_folder, f'Eval_{whoistruth}AreTruth_{evalset}') maybe_mkdir(savepath) # %% -------------------------------------------------------------- # Init model model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params) # %% -------------------------------------------------------------- # Prep data loaders slides = read_csv(opj(model_folder, f'fold_{fold}_test.csv')).loc[:, 'slide_name'].tolist() dataset = NucleusDatasetMask( root=EvalSets.dataset_roots[evalset][whoistruth], dbpath=EvalSets.dbpaths[evalset][whoistruth], slides=slides, **cfg.MaskDatasetConfigs.test_dataset) # %% -------------------------------------------------------------- # Evaluate model ckpt = load_ckp(checkpoint_path=checkpoint_path, model=model) model = ckpt['model'] if getmetrics: ecfgs = { k: v for k, v in cfg.MaskRCNNConfigs.training_params.items() if k in [ 'test_maxDets', 'n_testtime_augmentations', 'crop_inference_to_fov' ] } tsls = evaluateNucleusModel(model=model, checkpoint_path=checkpoint_path, dloader=DataLoader( dataset=dataset, **cfg.MaskDatasetConfigs.test_loader), **ecfgs) # save results for i, tsl in enumerate(tsls): with open(opj(savepath, f'testingMetrics_{i}.txt'), 'w') as f: f.write(str(tsl)[1:-1].replace(', ', '\n')) # %% -------------------------------------------------------------- # Visualize some predictions min_iou = 0.5 vis_props = {'linewidth': 0.15, 'text': False} maybe_mkdir(opj(savepath, 'predictions')) # cropper = tvdt.Cropper() model.eval() model.to('cpu') for imno in range(min(n_vis, len(dataset))): # pick one image from the dataset imgtensor, target = dataset.__getitem__(imno) imname = dataset.rfovids[int(target['image_id'])] print(f"visualizing image {imno} of {n_vis}: {imname}") # get prediction with torch.no_grad(): output = model([imgtensor.to('cpu')]) cpu_device = torch.device('cpu') output = [{k: v.to(cpu_device) for k, v in t.items()} for t in output] output = output[0] # mTODO?: the cropper does not support sparse masks # # crop the prediction to FOV # Ignore ambiguous nuclei from matching. Note that the # model already filters out anything predicted as ignore_label # in inference mode, so we only need to do this for gtruth keep = target['iscrowd'] == 0 trg_boxes = np.int32(target['boxes'][keep]) # get true/false positives/negatives output_boxes = np.int32(output['boxes']) _, TP, FN, FP = map_bboxes_using_hungarian_algorithm( bboxes1=trg_boxes, bboxes2=output_boxes, min_iou=min_iou) # concat relevant bounding boxes relevant_bboxes = np.concatenate(( output_boxes[TP], output_boxes[FP], trg_boxes[FN], ), axis=0) match_colors = [VisConfigs.MATCHING_COLORS['TP']] * len(TP) \ + [VisConfigs.MATCHING_COLORS['FP']] * len(FP) \ + [VisConfigs.MATCHING_COLORS['FN']] * len(FN) # just to comply with histomicstk default style rgtcodes = { k: { 'group': v, 'color': f'rgb(' + ','.join(str(c) for c in VisConfigs.CATEG_COLORS[v]) + ')', } for k, v in dataset.rlabelcodes.items() } # extract contours +/ condensed masks (truth) # noinspection PyTupleAssignmentBalance dense_mask, _, contoursdf_truth = parse_sparse_mask_for_use( sparse_mask=np.uint8(target['masks']), rgtcodes=rgtcodes, labels=target['labels'].tolist(), ) # extract contours +/ condensed masks (prediction) output_labels = np.int32(output['labels']) output_labels = output_labels.tolist() if not model.transform.densify_mask: # output mask is sparse # noinspection PyTupleAssignmentBalance _, _, contoursdf_prediction = parse_sparse_mask_for_use( sparse_mask=np.uint8(output['masks'][:, 0, :, :] > 0.5), rgtcodes=rgtcodes, labels=output_labels, ) else: # output mask is already dense contoursdf_prediction = get_contours_from_mask( MASK=output['masks'].numpy(), GTCodes_df=DataFrame.from_records( data=[{ 'group': rgtcodes[label]['group'], 'GT_code': idx + 1, 'color': rgtcodes[label]['color'] } for idx, label in enumerate(output_labels)]), MIN_SIZE=1, get_roi_contour=False, ) # get rgb rgb = np.uint8(imgtensor * 255.).transpose(1, 2, 0) # visualize bounding boxes and masks nperrow = 4 nrows = 1 fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.3 * nrows)) # just the image axis = ax[0] axis.imshow(rgb) axis.set_title('rgb', fontsize=12) # relevant predicted (TP, FP) & true (FN) boxes axis = ax[1] axis = pu.vis_bbox( img=rgb, bbox=relevant_bboxes, ax=axis, instance_colors=match_colors, linewidth=1.5, ) axis.set_title('Bboxes detection (TP/FP/FN)', fontsize=12) # predicted masks axis = ax[2] prediction_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_prediction.to_dict(orient='records'), **vis_props) axis.imshow(prediction_vis) axis.set_title('Predicted masks + classif.', fontsize=12) # true masks axis = ax[3] truth_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_truth.to_dict(orient='records'), **vis_props) axis.imshow(truth_vis) axis.set_title('True masks/bboxes + classif.', fontsize=12) # plt.show() plt.savefig(opj(savepath, f'predictions/{imno}_{imname}.png')) plt.close()
def run_one_maskrcnn_fold(fold: int, cfg, model_root: str, model_name: str, qcd_training=True, train=True, vis_test=True, n_vis=100, randomvis=True): # FIXME: for prototyping if fold == 999: cfg.MaskRCNNConfigs.training_params.update({ 'effective_batch_size': 4, 'smoothing_window': 1, 'test_evaluate_freq': 1, }) model_folder = opj(model_root, f'fold_{fold}') maybe_mkdir(model_folder) checkpoint_path = opj(model_folder, f'{model_name}.ckpt') # %% -------------------------------------------------------------- # Init model model = MaskRCNN(**cfg.MaskRCNNConfigs.maskrcnn_params) # %% -------------------------------------------------------------- # Test that it works in forward mode # model.eval() # x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] # predictions = model(x) # %% -------------------------------------------------------------- # Prep data loaders train_slides, test_slides = get_cv_fold_slides( train_test_splits_path=CoreSetQC.train_test_splits_path, fold=fold) # copy train/test slides with model itself just to be safe for tr in ('train', 'test'): fname = f'fold_{fold}_{tr}.csv' copyfile( opj(CoreSetQC.train_test_splits_path, fname), opj(model_folder, fname), ) # training data optionally QCd if qcd_training: train_dataset = NucleusDatasetMask( root=CoreSetQC.dataset_root, dbpath=CoreSetQC.dbpath, slides=train_slides, **cfg.MaskDatasetConfigs.train_dataset) else: train_dataset = NucleusDatasetMask( root=CoreSetNoQC.dataset_root, dbpath=CoreSetNoQC.dbpath, slides=train_slides, **cfg.MaskDatasetConfigs.train_dataset) # test set is always the QC'd data test_dataset = NucleusDatasetMask(root=CoreSetQC.dataset_root, dbpath=CoreSetQC.dbpath, slides=test_slides, **cfg.MaskDatasetConfigs.test_dataset) # handle class imbalance if cfg.MaskRCNNConfigs.handle_class_imbalance: del cfg.BaseDatasetConfigs.train_loader['shuffle'] cfg.BaseDatasetConfigs.train_loader['sampler'] = WeightedRandomSampler( weights=train_dataset.fov_weights, num_samples=len(train_dataset.fov_weights), replacement=cfg.MaskRCNNConfigs.sample_with_replacement, ) # %% -------------------------------------------------------------- # Train model if train: trainNucleusModel( model=model, checkpoint_path=checkpoint_path, data_loader=DataLoader(dataset=train_dataset, **cfg.MaskDatasetConfigs.train_loader), data_loader_test=DataLoader(dataset=test_dataset, **cfg.MaskDatasetConfigs.test_loader), **cfg.MaskRCNNConfigs.training_params) elif os.path.exists(checkpoint_path): ckpt = load_ckp(checkpoint_path=checkpoint_path, model=model) model = ckpt['model'] # %% -------------------------------------------------------------- # Visualize some predictions min_iou = 0.5 vis_props = {'linewidth': 0.15, 'text': False} maybe_mkdir(opj(model_folder, 'predictions')) if vis_test: dataset = test_dataset else: dataset = train_dataset # cropper = tvdt.Cropper() model.eval() model.to('cpu') if randomvis: tovis = list(np.random.choice(len(dataset), size=(n_vis, ))) else: tovis = list(range(n_vis)) for imidx, imno in enumerate(tovis): # pick one image from the dataset imgtensor, target = dataset.__getitem__(imno) imname = dataset.rfovids[int(target['image_id'])] print(f"predicting image {imidx} of {n_vis}: {imname}") # get prediction with torch.no_grad(): output = model([imgtensor.to('cpu')]) cpu_device = torch.device('cpu') output = [{k: v.to(cpu_device) for k, v in t.items()} for t in output] output = output[0] # mTODO?: the cropper does not support sparse masks # # crop the prediction to FOV # Ignore ambiguous nuclei from matching. Note that the # model already filters out anything predicted as ignore_label # in inference mode, so we only need to do this for gtruth keep = target['iscrowd'] == 0 trg_boxes = np.int32(target['boxes'][keep]) # get true/false positives/negatives output_boxes = np.int32(output['boxes']) _, TP, FN, FP = map_bboxes_using_hungarian_algorithm( bboxes1=trg_boxes, bboxes2=output_boxes, min_iou=min_iou) # concat relevant bounding boxes relevant_bboxes = np.concatenate(( output_boxes[TP], output_boxes[FP], trg_boxes[FN], ), axis=0) match_colors = [VisConfigs.MATCHING_COLORS['TP']] * len(TP) \ + [VisConfigs.MATCHING_COLORS['FP']] * len(FP) \ + [VisConfigs.MATCHING_COLORS['FN']] * len(FN) # just to comply with histomicstk default style rgtcodes = { k: { 'group': v, 'color': f'rgb(' + ','.join(str(c) for c in VisConfigs.CATEG_COLORS[v]) + ')', } for k, v in dataset.rlabelcodes.items() } # extract contours +/ condensed masks (truth) # noinspection PyTupleAssignmentBalance _, _, contoursdf_truth = parse_sparse_mask_for_use( sparse_mask=np.uint8(target['masks']), rgtcodes=rgtcodes, labels=target['labels'].tolist(), ) # extract contours +/ condensed masks (prediction) output_labels = np.int32(output['labels']) output_labels = output_labels.tolist() if not model.transform.densify_mask: # output mask is sparse # noinspection PyTupleAssignmentBalance _, _, contoursdf_prediction = parse_sparse_mask_for_use( sparse_mask=np.uint8(output['masks'][:, 0, :, :] > 0.5), rgtcodes=rgtcodes, labels=output_labels, ) else: # output mask is already dense contoursdf_prediction = get_contours_from_mask( MASK=output['masks'].numpy(), GTCodes_df=DataFrame.from_records( data=[{ 'group': rgtcodes[label]['group'], 'GT_code': idx + 1, 'color': rgtcodes[label]['color'] } for idx, label in enumerate(output_labels)]), MIN_SIZE=1, get_roi_contour=False, ) # get rgb rgb = np.uint8(imgtensor * 255.).transpose(1, 2, 0) # visualize bounding boxes and masks nperrow = 4 nrows = 1 fig, ax = plt.subplots(nrows, nperrow, figsize=(5 * nperrow, 5.3 * nrows)) # just the image axis = ax[0] axis.imshow(rgb) axis.set_title('rgb', fontsize=12) # relevant predicted (TP, FP) & true (FN) boxes axis = ax[1] axis = pu.vis_bbox( img=rgb, bbox=relevant_bboxes, ax=axis, instance_colors=match_colors, linewidth=1.5, ) axis.set_title('Bboxes detection (TP/FP/FN)', fontsize=12) # predicted masks axis = ax[2] prediction_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_prediction.to_dict(orient='records'), **vis_props) axis.imshow(prediction_vis) axis.set_title('Predicted masks + classif.', fontsize=12) # true masks axis = ax[3] truth_vis = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contoursdf_truth.to_dict(orient='records'), **vis_props) axis.imshow(truth_vis) axis.set_title('True masks/bboxes + classif.', fontsize=12) # plt.show() plt.savefig(opj(model_folder, f'predictions/{imno}_{imname}.png')) plt.close()
def annotations_to_contours_no_mask(gc, slide_id, MPP=5.0, MAG=None, mode='min_bounding_box', bounds=None, idx_for_roi=None, slide_annotations=None, element_infos=None, linewidth=0.2, get_rgb=True, get_visualization=True, text=True): """Process annotations to get RGB and contours without intermediate masks. Parameters ---------- gc : object girder client object to make requests, for example: gc = girder_client.GirderClient(apiUrl = APIURL) gc.authenticate(interactive=True) slide_id : str girder id for item (slide) MPP : float or None Microns-per-pixel -- best use this as it's more well-defined than magnification which is more scanner or manufacturer specific. MPP of 0.25 often roughly translates to 40x MAG : float or None If you prefer to use whatever magnification is reported in slide. If neither MPP or MAG is provided, everything is retrieved without scaling at base (scan) magnification. mode : str This specifies which part of the slide to get the mask from. Allowed modes include the following - wsi: get scaled up or down version of mask of whole slide - min_bounding_box: get minimum box for all annotations in slide - manual_bounds: use given ROI bounds provided by the 'bounds' param - polygonal_bounds: use the idx_for_roi param to get coordinates bounds : dict or None if not None, has keys 'XMIN', 'XMAX', 'YMIN', 'YMAX' for slide region coordinates (AT BASE MAGNIFICATION) to get labeled image (mask) for. Use this with the 'manual_bounds' run mode. idx_for_roi : int index of ROI within the element_infos dataframe. Use this with the 'polygonal_bounds' run mode. slide_annotations : list or None Give this parameter to avoid re-getting slide annotations. If you do provide the annotations, though, make sure you have used scale_slide_annotations() to scale them up or down by sf BEFOREHAND. element_infos : pandas DataFrame. The columns annidx and elementidx encode the dict index of annotation document and element, respectively, in the original slide_annotations list of dictionaries. This can be obained by get_bboxes_from_slide_annotations() method. Make sure you have used scale_slide_annotations(). linewidth : float visualization line width get_rgb: bool get rgb image? get_visualization : bool get overlayed annotation bounds over RGB for visualization text : bool add text labels to visualization? Returns -------- dict Results dict containing one or more of the following keys - bounds: dict of bounds at scan magnification - rgb: (mxnx3 np array) corresponding rgb image - contours: dict - visualization: (mxnx3 np array) visualization overlay """ MPP, MAG, mode, bounds, idx_for_roi, get_rgb, get_visualization = \ _sanity_checks( MPP, MAG, mode, bounds, idx_for_roi, get_rgb, get_visualization) # calculate the scale factor sf, appendStr = get_scale_factor_and_appendStr(gc=gc, slide_id=slide_id, MPP=MPP, MAG=MAG) if slide_annotations is not None: assert element_infos is not None, "must also provide element_infos" else: # get annotations for slide slide_annotations = gc.get('/annotation/item/' + slide_id) # scale up/down annotations by a factor slide_annotations = scale_slide_annotations(slide_annotations, sf=sf) # get bounding box information for all annotations -> scaled by sf element_infos = get_bboxes_from_slide_annotations(slide_annotations) # Determine get region based on run mode, keeping in mind that it # must be at BASE MAGNIFICATION coordinates before it is passed # on to get_mask_from_slide() # if mode != 'polygonal_bound': bounds = _get_roi_bounds_by_run_mode(gc=gc, slide_id=slide_id, mode=mode, bounds=bounds, element_infos=element_infos, idx_for_roi=idx_for_roi, sf=sf) # only keep relevant elements and get uncropped bounds elinfos_roi, uncropped_bounds = _keep_relevant_elements_for_roi( element_infos, sf=sf, mode=mode, idx_for_roi=idx_for_roi, roiinfo=copy.deepcopy(bounds)) # find relevant portion from slide annotations to use # (with overflowing beyond edge) annotations_slice = _trim_slide_annotations_to_roi( copy.deepcopy(slide_annotations), elinfos_roi=elinfos_roi) # get roi polygon vertices rescaled_bounds = {k: int(v * sf) for k, v in bounds.items()} if mode == 'polygonal_bounds': roi_coords = _get_coords_from_element( copy.deepcopy(slide_annotations[int( element_infos.loc[idx_for_roi, 'annidx'])]['annotation']['elements'][int( element_infos.loc[idx_for_roi, 'elementidx'])])) cropping_bounds = None else: roi_coords = None cropping_bounds = rescaled_bounds # tabularize to use contours _, contours_df = parse_slide_annotations_into_tables( annotations_slice, cropping_bounds=cropping_bounds, cropping_polygon_vertices=roi_coords, use_shapely=mode in ('manual_bounds', 'polygonal_bounds'), ) contours_list = contours_df.to_dict(orient='records') # Final bounds (relative to slide at base magnification) bounds = {k: int(v / sf) for k, v in rescaled_bounds.items()} result = dict() # get RGB if get_rgb: getStr = \ "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG" \ % (slide_id, bounds['XMIN'], bounds['XMAX'], bounds['YMIN'], bounds['YMAX']) getStr += appendStr resp = gc.get(getStr, jsonResp=False) rgb = get_image_from_htk_response(resp) result['rgb'] = rgb # Assign to results result.update({ 'contours': contours_list, 'bounds': bounds, }) # get visualization of annotations on RGB if get_visualization: result['visualization'] = _visualize_annotations_on_rgb( rgb=rgb, contours_list=contours_list, linewidth=linewidth, text=text) return result