def __getitem__(self, idx): batchx = self.train_files[idx * self.batch_size:(idx + 1) * self.batch_size] batchy = self.train_outputs[idx * self.batch_size:(idx + 1) * self.batch_size] X_RGB = np.zeros((self.batch_size, 256, 256, 3)) X_Per = np.zeros((self.batch_size, 32, 32, 1)) Y = np.zeros((self.batch_size, 2)) for i in range(self.batch_size): image_id = int(float(re.findall("\d+\.\d+", batchx[i])[0])) fnameRGB = batchx[i] + '.jpg' fnamePer = batchx[i] + '.pkl' if batchy[i] == self.label['malignant']: pathRGB = os.path.join(self.path_mal_train, fnameRGB) pathPer = os.path.join(self.path_mal_train_per, fnamePer) elif batchy[i] == self.label['benign']: pathRGB = os.path.join(self.path_ben_train, fnameRGB) pathPer = os.path.join(self.path_ben_train_per, fnamePer) img = skimage.io.imread(pathRGB) if img.shape == (1024, 1024, 3): img = img[::4, ::4, :] image_id = int(float(re.findall("\d+\.\d+", pathRGB)[0])) if image_id in self.config['stats'].keys(): [src_mu, src_sigma] = self.stats[image_id] img_nmzd = htk_cnorm.reinhard( img, self.ref_mu_lab, self.ref_std_lab, src_mu=src_mu, src_sigma=src_sigma).astype('float') else: print '#### stats for %d not present' % (image_id) img_nmzd = htk_cnorm.reinhard(img, self.ref_mu_lab, self.ref_std_lab).astype('float') imgRGB = preprocess_resnet(img_nmzd) with open(pathPer, 'rb') as f: img = pickle.load(f) imgPer = self.preprocess_persistence(img) X_RGB[i] = imgRGB X_Per[i] = imgPer Y[i] = to_categorical(batchy[i], num_classes=2) return ([X_RGB, X_Per], Y)
def detect_nuclei(im_tile, tile_info=None, args=None, src_mu_lab=None, src_sigma_lab=None): args = args or default_args # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab, src_mu=src_mu_lab, src_sigma=src_sigma_lab) # perform color decovolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclei im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(im_nuclei_stain, args) # generate nuclei annotations nuclei_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info=tile_info, format=args.nuclei_annotation_format) return nuclei_annot_list
def detect_nuclei_kofahi(im_input, args): # perform color normalization im_nmzd = htk_cnorm.reinhard(im_input, args.reference_mu_lab, args.reference_std_lab) # perform color decovolution w = np.array([ stain_color_map[args.stain_1], stain_color_map[args.stain_2], stain_color_map[args.stain_3] ]).T im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment foreground (assumes nuclei are darker on a bright background) im_nuclei_fgnd_mask = sp.ndimage.morphology.binary_fill_holes( im_nuclei_stain < args.foreground_threshold) # run adaptive multi-scale LoG filter im_log = htk_shape_filters.clog(im_nuclei_stain, im_nuclei_fgnd_mask, sigma_min=args.min_radius * np.sqrt(2), sigma_max=args.max_radius * np.sqrt(2)) # apply local maximum clustering im_nuclei_seg_mask, seeds, max = htk_seg.nuclear.max_clustering( im_log, im_nuclei_fgnd_mask, args.local_max_search_radius) # filter out small objects im_nuclei_seg_mask = htk_seg.label.area_open( im_nuclei_seg_mask, args.min_nucleus_area).astype(np.int) return im_nuclei_seg_mask
def detect_tile_nuclei(slide_path, tile_position, args, **it_kwargs): # get slide tile source ts = large_image.getTileSource(slide_path) # get requested tile tile_info = ts.getSingleTile( tile_position=tile_position, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) # get tile image im_tile = tile_info['tile'][:, :, :3] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab) # perform color decovolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclei im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(im_nuclei_stain, args) # generate nuclei annotations nuclei_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, args.nuclei_annotation_format) return nuclei_annot_list
def setUp(self): # define parameters args = { 'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433], 'reference_std_lab': [0.57506023, 0.10403329, 0.01364062], 'min_radius': 12, 'max_radius': 30, 'foreground_threshold': 60, 'min_nucleus_area': 80, 'local_max_search_radius': 10, } args = collections.namedtuple('Parameters', args.keys())(**args) # read input image input_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png') im_input = skimage.io.imread(input_image_file)[:, :, :3] # perform color normalization im_input_nmzd = htk_cnorm.reinhard(im_input, args.reference_mu_lab, args.reference_std_lab) # perform color decovolution w = htk_cdeconv.rgb_separate_stains_macenko_pca( im_input_nmzd, im_input_nmzd.max()) im_stains = htk_cdeconv.color_deconvolution(im_input_nmzd, w).Stains nuclei_channel = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map['hematoxylin'], w) im_nuclei_stain = im_stains[:, :, nuclei_channel].astype(np.float) cytoplasm_channel = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map['eosin'], w) im_cytoplasm_stain = im_stains[:, :, cytoplasm_channel].astype(np.float) # segment nuclei im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi( im_nuclei_stain, args) # perform connected component analysis nuclei_rprops = skimage.measure.regionprops(im_nuclei_seg_mask) # compute nuclei features fdata_nuclei = htk_features.compute_nuclei_features( im_nuclei_seg_mask, im_nuclei_stain, im_cytoplasm=im_cytoplasm_stain) self.im_input = im_input self.im_input_nmzd = im_input_nmzd self.im_nuclei_stain = im_nuclei_stain self.im_nuclei_seg_mask = im_nuclei_seg_mask self.nuclei_rprops = nuclei_rprops self.fdata_nuclei = fdata_nuclei
def set_slide_info_and_get_tissue_mask(self): """Set self.slide_info dict and self.labeled tissue mask.""" # This is a presistent dict to store information about slide self.slide_info = self.gc.get('item/%s/tiles' % self.slide_id) # get tissue mask thumbnail_rgb = get_slide_thumbnail(self.gc, self.slide_id) # color normalization if desired if 'thumbnail' in self.cnorm_params.keys(): thumbnail_rgb = np.uint8( reinhard(im_src=thumbnail_rgb, target_mu=self.cnorm_params['thumbnail']['mu'], target_sigma=self.cnorm_params['thumbnail']['sigma'])) # get labeled tissue mask -- each unique value is one tissue piece labeled, _ = get_tissue_mask(thumbnail_rgb, **self.get_tissue_mask_kwargs) if len(np.unique(labeled)) < 2: raise ValueError("No tissue detected!") if self.visualize_tissue_boundary: annotation_docs = get_tissue_boundary_annotation_documents( self.gc, slide_id=self.slide_id, labeled=labeled) for doc in annotation_docs: _ = self.gc.post("/annotation?itemId=" + self.slide_id, json=doc) # Find size relative to WSI self.slide_info[ 'F_tissue'] = self.slide_info['sizeX'] / labeled.shape[1] return labeled
def compute_tile_nuclei_features(slide_path, tile_position, args, it_kwargs, src_mu_lab=None, src_sigma_lab=None): # get slide tile source ts = large_image.getTileSource(slide_path) # get requested tile tile_info = ts.getSingleTile( tile_position=tile_position, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) # get tile image im_tile = tile_info['tile'][:, :, :3] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab, src_mu=src_mu_lab, src_sigma=src_sigma_lab) # perform color decovolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclei im_nuclei_seg_mask = cli_utils.detect_nuclei_kofahi(im_nuclei_stain, args) # generate nuclei annotations nuclei_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, args.nuclei_annotation_format) # compute nuclei features if args.cytoplasm_features: im_cytoplasm_stain = im_stains[:, :, 1].astype(np.float) else: im_cytoplasm_stain = None fdata = htk_features.compute_nuclei_features( im_nuclei_seg_mask, im_nuclei_stain, im_cytoplasm_stain, fsd_bnd_pts=args.fsd_bnd_pts, fsd_freq_bins=args.fsd_freq_bins, cyto_width=args.cyto_width, num_glcm_levels=args.num_glcm_levels, morphometry_features_flag=args.morphometry_features, fsd_features_flag=args.fsd_features, intensity_features_flag=args.intensity_features, gradient_features_flag=args.gradient_features, ) fdata.columns = ['Feature.' + col for col in fdata.columns] return nuclei_annot_list, fdata
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs, src_mu_lab=None, src_sigma_lab=None): # get slide tile source ts = large_image.getTileSource(slide_path) # get requested tile tile_info = ts.getSingleTile( tile_position=tile_position, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) # get tile image im_tile = tile_info['tile'][:, :, :3] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab, src_mu=src_mu_lab, src_sigma=src_sigma_lab) # perform color decovolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclear foreground im_nuclei_fgnd_mask = im_nuclei_stain < args.foreground_threshold # segment nuclei im_nuclei_seg_mask = htk_nuclear.detect_nuclei_kofahi( im_nuclei_stain, im_nuclei_fgnd_mask, args.min_radius, args.max_radius, args.min_nucleus_area, args.local_max_search_radius ) # Delete border nuclei if args.ignore_border_nuclei is True: im_nuclei_seg_mask = htk_seg_label.delete_border(im_nuclei_seg_mask) # generate nuclei annotations nuclei_annot_list = [] flag_nuclei_found = np.any(im_nuclei_seg_mask) if flag_nuclei_found: nuclei_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, args.nuclei_annotation_format) return nuclei_annot_list
def compute_superpixel_data(img_path, tile_position, wsi_mean, wsi_stddev): # get slide tile source ts = large_image.getTileSource(img_path) # get requested tile information tile_info = ts.getSingleTile( tile_position=tile_position, resample=True, format=large_image.tilesource.TILE_FORMAT_NUMPY) im_tile = tile_info['tile'][:, :, :3] reference_mu_lab = [8.63234435, -0.11501964, 0.03868433] reference_std_lab = [0.57506023, 0.10403329, 0.01364062] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, reference_mu_lab, reference_std_lab, wsi_mean, wsi_stddev) patchSize = 32 # compute the number of super-pixels im_width, im_height = im_nmzd.shape[:2] n_superpixels = (im_width / patchSize) * (im_height / patchSize) # # Generate labels using a superpixel algorithm (SLIC) # In SLIC, compactness controls image space proximity. # Higher compactness will make the shape of superpixels more square. # compactness = 50 im_label = slic(im_nmzd, n_segments=n_superpixels, compactness=compactness) + 1 region_props = regionprops(im_label) # set superpixel data list s_data = [] for i in range(len(region_props)): # get x, y centroids for superpixel cen_x, cen_y = region_props[i].centroid # get bounds of superpixel region min_row, max_row, min_col, max_col = \ get_patch_bounds(cen_x, cen_y, patchSize, im_width, im_height) rgb_data = im_nmzd[min_row:max_row, min_col:max_col] s_data.append(rgb_data) return s_data
def preprocess(self, img_path): img = skimage.io.imread(img_path) if img.shape == (1024, 1024, 3): img = img[::4, ::4, :] image_id = int(float(re.findall("\d+\.\d+", img_path)[0])) if image_id in self.stats.keys(): [src_mu, src_sigma] = self.stats[image_id] img_nmzd = htk_cnorm.reinhard(img, self.ref_mu_lab, self.ref_std_lab, src_mu=src_mu, src_sigma=src_sigma).astype('float') else: print '#### stats for %d not present' % (image_id) img_nmzd = htk_cnorm.reinhard(img, self.ref_mu_lab, self.ref_std_lab).astype('float') img = preprocess_resnet(img_nmzd) return img
def test_reinhard(self): """Test reinhard.""" # # SANITY CHECK! normalize to LAB mean and std from SAME slide # mean_lab, std_lab = lab_mean_std(tissue_rgb) # tissue_rgb_normalized = reinhard( # tissue_rgb, target_mu=mean_lab, target_sigma=std_lab) # # we expect the images to be (almost) exactly the same # assert np.mean(tissue_rgb - tissue_rgb_normalized) < 1 # color norm. standard (from TCGA-A2-A3XS-DX1, Amgad et al, 2019) cnorm = { 'mu': np.array([8.74108109, -0.12440419, 0.0444982]), 'sigma': np.array([0.6135447, 0.10989545, 0.0286032]), } # Normalize to pre-set color standard (unmasked) tissue_rgb_normalized = reinhard(cfg.tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma']) # check that it matches mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized) assert all(np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1]) assert all(np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]) # Do MASKED normalization to preset standard tissue_rgb_normalized = reinhard(cfg.tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'], mask_out=cfg.mask_out) # check that it matches mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized, mask_out=cfg.mask_out) assert all(np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1]) assert all(np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1])
def set_tissue_rgb(self): """Load RGB from server for single tissue piece.""" # load RGB for this tissue piece at saliency magnification getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d&encoding=PNG" % ( self.cd.slide_id, self.xmin, self.xmax, self.ymin, self.ymax) + "&magnification=%d" % self.cd.MAG resp = self.cd.gc.get(getStr, jsonResp=False) self.tissue_rgb = get_image_from_htk_response(resp) # color normalization if desired if 'main' in self.cd.cnorm_params.keys(): self.tissue_rgb = np.uint8( reinhard(im_src=self.tissue_rgb, target_mu=self.cd.cnorm_params['main']['mu'], target_sigma=self.cd.cnorm_params['main']['sigma']))
def test_normalization(self): input_image_file = os.path.join(TEST_DATA_DIR, 'L1.png') ref_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png') # read input image im_input = skimage.io.imread(input_image_file)[:, :, :3] # read reference image im_reference = skimage.io.imread(ref_image_file)[:, :, :3] # get mean and stddev of reference image in lab space mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference) # perform color normalization im_nmzd = htk_cn.reinhard(im_input, mean_ref, std_ref) # transform normalized image to LAB color space mean_nmzd, std_nmzd = htk_cvt.lab_mean_std(im_nmzd) # check if mean and stddev of normalized and reference images are equal np.testing.assert_allclose(mean_nmzd, mean_ref, atol=1e-1) np.testing.assert_allclose(std_nmzd, std_ref, atol=1e-1)
def test_normalization(self): input_image_file = os.path.join(TEST_DATA_DIR, 'L1.png') ref_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png') # read input image im_input = skimage.io.imread(input_image_file)[:, :, :3] # read reference image im_reference = skimage.io.imread(ref_image_file)[:, :, :3] # get mean and stddev of reference image in lab space mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference) # perform color normalization im_nmzd = htk_cn.reinhard(im_input, mean_ref, std_ref) # transform normalized image to LAB color space mean_nmzd, std_nmzd = htk_cvt.lab_mean_std(im_nmzd) # check if mean and stddev of normalized and reference images are equal np.testing.assert_allclose(mean_nmzd, mean_ref, atol=1e-1) np.testing.assert_allclose(std_nmzd, std_ref, atol=1e-1)
def color_normalize_unspecified_components(self): """Color normalize "true" tissue components.""" if self.cdt.color_normalization_method == 'reinhard': self.cdt._print2("%s: -- reinhard normalization ..." % self.monitorPrefix) self.tissue_rgb = reinhard( self.tissue_rgb, target_mu=self.cdt.target_stats_reinhard['mu'], target_sigma=self.cdt.target_stats_reinhard['sigma'], mask_out=self.labeled != self.cdt.GTcodes.loc["not_specified", "GT_code"]) elif self.cdt.color_normalization_method == 'macenko_pca': self.cdt._print2("%s: -- macenko normalization ..." % self.monitorPrefix) self.tissue_rgb = deconvolution_based_normalization( self.tissue_rgb, W_target=self.cdt.target_W_macenko, mask_out=self.labeled != self.cdt.GTcodes.loc["not_specified", "GT_code"], stain_unmixing_routine_params=self.cdt. stain_unmixing_routine_params) else: self.cdt._print2("%s: -- No normalization!" % self.monitorPrefix)
def compute_superpixel_data(img_path, tile_position, wsi_mean, wsi_stddev, args, **it_kwargs): # get slide tile source ts = large_image.getTileSource(img_path) # get requested tile information tile_info = ts.getSingleTile( tile_position=tile_position, resample=True, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) im_tile = tile_info['tile'][:, :, :3] # get global x and y positions left = tile_info['gx'] top = tile_info['gy'] # get scale scale = tile_info['gwidth'] / tile_info['width'] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab, wsi_mean, wsi_stddev) # compute the number of super-pixels im_width, im_height = im_nmzd.shape[:2] n_superpixels = (im_width / args.patchSize) * (im_height / args.patchSize) # # Generate labels using a superpixel algorithm (SLIC) # In SLIC, compactness controls image space proximity. # Higher compactness will make the shape of superpixels more square. # im_label = slic( im_nmzd, n_segments=n_superpixels, compactness=args.compactness) + 1 region_props = regionprops(im_label) # set superpixel data list s_data = [] x_cent = [] y_cent = [] x_brs = [] y_brs = [] for i in range(len(region_props)): # get x, y centroids for superpixel cen_x, cen_y = region_props[i].centroid # get bounds of superpixel region min_row, max_row, min_col, max_col = \ get_patch_bounds(cen_x, cen_y, args.patchSize, im_width, im_height) # grab superpixel label mask lmask = (im_label[:, :] == region_props[i].label).astype(np.bool) # embed with center pixel in middle of padded window emask = np.zeros((lmask.shape[0] + 2, lmask.shape[1] + 2), dtype=np.bool) emask[1:-1, 1:-1] = lmask # find boundaries bx, by = htk_seg.label.trace_object_boundaries(emask) with np.errstate(invalid='ignore'): # remove redundant points mby, mbx = htk_utils.merge_colinear(by[0].astype(float), bx[0].astype(float)) scaled_x = (mbx - 1) * scale scaled_y = (mby - 1) * scale # get superpixel boundary at highest-res x_brs.append(scaled_x + top) y_brs.append(scaled_y + left) rgb_data = im_nmzd[min_row:max_row, min_col:max_col] s_data.append(rgb_data) # get superpixel centers at highest-res x_cent.append(round((cen_x * scale + top), 1)) y_cent.append(round((cen_y * scale + left), 1)) return s_data, x_cent, y_cent, x_brs, y_brs
def test_create_tile_nuclei_annotations(self): wsi_path = os.path.join( utilities.externaldata( 'data/TCGA-06-0129-01Z-00-DX3.bae772ea-dd36-47ec-8185-761989be3cc8.svs.sha512' # noqa )) # define parameters args = { 'reference_mu_lab': [8.63234435, -0.11501964, 0.03868433], 'reference_std_lab': [0.57506023, 0.10403329, 0.01364062], 'stain_1': 'hematoxylin', 'stain_2': 'eosin', 'stain_3': 'null', 'stain_1_vector': [-1, -1, -1], 'stain_2_vector': [-1, -1, -1], 'stain_3_vector': [-1, -1, -1], 'min_fgnd_frac': 0.50, 'analysis_mag': 20, 'analysis_tile_size': 1200, 'foreground_threshold': 60, 'min_radius': 6, 'max_radius': 12, 'min_nucleus_area': 25, 'local_max_search_radius': 8, # In Python 3 unittesting, the scheduler fails if it uses processes 'scheduler': 'multithreading', # None, 'num_workers': -1, 'num_threads_per_worker': 1, } args = collections.namedtuple('Parameters', args.keys())(**args) # read WSI ts = large_image.getTileSource(wsi_path) ts_metadata = ts.getMetadata() analysis_tile_size = { 'width': int(ts_metadata['tileWidth'] * np.floor( 1.0 * args.analysis_tile_size / ts_metadata['tileWidth'])), 'height': int(ts_metadata['tileHeight'] * np.floor( 1.0 * args.analysis_tile_size / ts_metadata['tileHeight'])) } # define ROI roi = { 'left': ts_metadata['sizeX'] / 2, 'top': ts_metadata['sizeY'] * 3 / 4, 'width': analysis_tile_size['width'], 'height': analysis_tile_size['height'], 'units': 'base_pixels' } # define tile iterator parameters it_kwargs = { 'tile_size': { 'width': args.analysis_tile_size }, 'scale': { 'magnification': args.analysis_mag }, 'region': roi } # create dask client cli_utils.create_dask_client(args) # get tile foregreoung at low res im_fgnd_mask_lres, fgnd_seg_scale = \ cli_utils.segment_wsi_foreground_at_low_res(ts) # compute tile foreground fraction tile_fgnd_frac_list = htk_utils.compute_tile_foreground_fraction( wsi_path, im_fgnd_mask_lres, fgnd_seg_scale, it_kwargs) num_fgnd_tiles = np.count_nonzero( tile_fgnd_frac_list >= args.min_fgnd_frac) np.testing.assert_equal(num_fgnd_tiles, 2) # create nuclei annotations nuclei_bbox_annot_list = [] nuclei_bndry_annot_list = [] for tile_info in ts.tileIterator( format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs): im_tile = tile_info['tile'][:, :, :3] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab) # perform color deconvolution w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # segment nuclei im_nuclei_seg_mask = htk_nuclear.detect_nuclei_kofahi( im_nuclei_stain, im_nuclei_stain < args.foreground_threshold, args.min_radius, args.max_radius, args.min_nucleus_area, args.local_max_search_radius) # generate nuclei annotations as bboxes cur_bbox_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, 'bbox') nuclei_bbox_annot_list.extend(cur_bbox_annot_list) # generate nuclei annotations as boundaries cur_bndry_annot_list = cli_utils.create_tile_nuclei_annotations( im_nuclei_seg_mask, tile_info, 'boundary') nuclei_bndry_annot_list.extend(cur_bndry_annot_list) # compare nuclei bbox annotations with gtruth nuclei_bbox_annot_gtruth_file = os.path.join( utilities.externaldata( 'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_bbox.anot.sha512' # noqa )) with open(nuclei_bbox_annot_gtruth_file, 'r') as fbbox_annot: nuclei_bbox_annot_list_gtruth = json.load(fbbox_annot)['elements'] # Check that nuclei_bbox_annot_list is nearly equal to # nuclei_bbox_annot_list_gtruth assert len(nuclei_bbox_annot_list) == len( nuclei_bbox_annot_list_gtruth) for pos in range(len(nuclei_bbox_annot_list)): np.testing.assert_array_almost_equal( nuclei_bbox_annot_list[pos]['center'], nuclei_bbox_annot_list_gtruth[pos]['center'], 0) np.testing.assert_almost_equal( nuclei_bbox_annot_list[pos]['width'], nuclei_bbox_annot_list_gtruth[pos]['width'], 1) np.testing.assert_almost_equal( nuclei_bbox_annot_list[pos]['height'], nuclei_bbox_annot_list_gtruth[pos]['height'], 1) # compare nuclei boundary annotations with gtruth nuclei_bndry_annot_gtruth_file = os.path.join( utilities.externaldata( 'data/TCGA-06-0129-01Z-00-DX3_roi_nuclei_boundary.anot.sha512' # noqa )) with open(nuclei_bndry_annot_gtruth_file, 'r') as fbndry_annot: nuclei_bndry_annot_list_gtruth = json.load( fbndry_annot)['elements'] assert len(nuclei_bndry_annot_list) == len( nuclei_bndry_annot_list_gtruth) for pos in range(len(nuclei_bndry_annot_list)): np.testing.assert_array_almost_equal( nuclei_bndry_annot_list[pos]['points'], nuclei_bndry_annot_list_gtruth[pos]['points'], 0)
def RGBTestData(config): print 'loading RGB data' path_mal_test, _, files_malignant_test = next( os.walk(os.path.join(config.test_dir, 'malignant', 'rgb'))) path_ben_test, _, files_benign_test = next( os.walk(os.path.join(config.test_dir, 'benign', 'rgb'))) mal_paths_test = glob.glob(os.path.join(path_mal_test, '*')) ben_paths_test = glob.glob(os.path.join(path_ben_test, '*')) mal_outputs_test = [config.label.malignant] * len(mal_paths_test) ben_outputs_test = [config.label.benign] * len(ben_paths_test) test_paths = mal_paths_test + ben_paths_test test_outputs = mal_outputs_test + ben_outputs_test z = zip(test_paths, test_outputs) random.shuffle(z) test_paths, test_outputs = zip(*z) ref_std_lab = (0.57506023, 0.10403329, 0.01364062) ref_mu_lab = (8.63234435, -0.11501964, 0.03868433) if os.path.isfile('configs/stats.pkl'): with open('configs/stats.pkl', 'rb') as f: stats = pickle.load(f) print '################### Stats loaded Test ####################' config['stats'] = stats else: print 'No stats file found (To obtain Mu and Sigma from original whole image).' len_test = len(test_outputs) X = np.zeros((len_test, 256, 256, 3)) Y = [-1] * len_test for i in range(len_test): img = skimage.io.imread(test_paths[i]) if img.shape == (1024, 1024, 3): img = img[::4, ::4, :] image_id = int(float(re.findall("\d+\.\d+", test_paths[i])[0])) if image_id in stats.keys(): [src_mu, src_sigma] = stats[image_id] img_nmzd = htk_cnorm.reinhard(img, ref_mu_lab, ref_std_lab, src_mu=src_mu, src_sigma=src_sigma).astype('float') else: print '#### stats for %d not present' % (image_id) img_nmzd = htk_cnorm.reinhard(img, ref_mu_lab, ref_std_lab).astype('float') img = preprocess_resnet(img_nmzd) X[i] = img Y[i] = test_outputs[i] return (X, Y)
def detect_nuclei(im_input, min_radius=6, max_radius=10, display_result=False): # color normalization ref_mu_lab = (8.63234435, -0.11501964, 0.03868433) ref_std_lab = (0.57506023, 0.10403329, 0.01364062) im_nmzd = htk_cnorm.reinhard(im_input, ref_mu_lab, ref_std_lab) # color deconvolution w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_nmzd, 255) nuclear_chid = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map['hematoxylin'], w_est) im_nuclei_stain = htk_cdeconv.color_deconvolution(im_nmzd, w_est, 255).Stains[:, :, nuclear_chid] # segment nuclei foreground th = skimage.filters.threshold_li(im_nuclei_stain) * 0.8 # th = skimage.filters.threshold_otsu(im_nuclei_stain) im_fgnd_mask = im_nuclei_stain < th im_fgnd_mask = skimage.morphology.opening(im_fgnd_mask, skimage.morphology.disk(2)) im_fgnd_mask = skimage.morphology.closing(im_fgnd_mask, skimage.morphology.disk(1)) # detect nuclei im_dog, im_dog_sigma = htk_shape_filters.cdog( im_nuclei_stain, im_fgnd_mask, sigma_min=min_radius / np.sqrt(2), sigma_max=max_radius / np.sqrt(2)) nuclei_coord = skimage.feature.peak_local_max(im_dog, min_distance=min_radius / 2, threshold_rel=0.1) nuclei_coord = nuclei_coord[im_fgnd_mask[nuclei_coord[:, 0], nuclei_coord[:, 1]], :] nuclei_rad = np.array([ im_dog_sigma[nuclei_coord[i, 0], nuclei_coord[i, 1]] * np.sqrt(2) for i in range(nuclei_coord.shape[0]) ]) # display result if display_result: print 'Number of nuclei = ', nuclei_coord.shape[0] plt.figure(figsize=(30, 20)) plt.subplot(2, 2, 1) plt.imshow(im_input) plt.title('Input', fontsize=labelsize) plt.axis('off') plt.subplot(2, 2, 2) plt.imshow(im_nuclei_stain) plt.title('Deconv nuclei stain', fontsize=labelsize) plt.axis('off') plt.subplot(2, 2, 3) plt.imshow(im_fgnd_mask) plt.title('Foreground mask', fontsize=labelsize) plt.axis('off') plt.subplot(2, 2, 4) plt.imshow(im_nmzd) plt.plot(nuclei_coord[:, 1], nuclei_coord[:, 0], 'k+') for i in range(nuclei_coord.shape[0]): cx = nuclei_coord[i, 1] cy = nuclei_coord[i, 0] r = nuclei_rad[i] mcircle = mpatches.Circle((cx, cy), r, color='g', fill=False) plt.gca().add_patch(mcircle) plt.title('Nuclei detection', fontsize=labelsize) plt.axis('off') plt.tight_layout() return nuclei_coord, nuclei_rad
def CombinedTestData(config): print 'Loading combined data' path_mal_test, _, files_malignant_test = next( os.walk(os.path.join(config.test_dir, 'malignant', 'rgb'))) path_ben_test, _, files_benign_test = next( os.walk(os.path.join(config.test_dir, 'benign', 'rgb'))) path_mal_test_per, _, _ = next( os.walk( os.path.join(config.test_dir, 'malignant', 'persistence_images'))) path_ben_test_per, _, _ = next( os.walk(os.path.join(config.test_dir, 'benign', 'persistence_images'))) batch_size = config.trainer.batch_size label = config.label mal_paths_test = glob.glob(os.path.join(path_mal_test_per, '*')) ben_paths_test = glob.glob(os.path.join(path_ben_test_per, '*')) mal_outputs_test = [label['malignant']] * len(mal_paths_test) ben_outputs_test = [label['benign']] * len(ben_paths_test) test_paths = mal_paths_test + ben_paths_test test_outputs = mal_outputs_test + ben_outputs_test test_files = [os.path.basename(elem) for elem in test_paths] test_files = [elem.replace('.pkl', '') for elem in test_files] ref_std_lab = (0.57506023, 0.10403329, 0.01364062) ref_mu_lab = (8.63234435, -0.11501964, 0.03868433) if os.path.isfile('configs/stats.pkl'): with open('configs/stats.pkl', 'rb') as f: stats = pickle.load(f) print 'Stats loaded' config['stats'] = stats else: print 'No stats file found (To obtain Mu and Sigma from original whole image).' len_test = len(test_outputs) X_RGB = np.zeros((len_test, 256, 256, 3)) X_Per = np.zeros((len_test, 32, 32, 1)) Y = [-1] * len_test for i in range(len_test): image_id = int(float(re.findall("\d+\.\d+", test_files[i])[0])) fnameRGB = test_files[i] + '.jpg' fnamePer = test_files[i] + '.pkl' if test_outputs[i] == config.label['malignant']: pathRGB = os.path.join(path_mal_test, fnameRGB) pathPer = os.path.join(path_mal_test_per, fnamePer) elif test_outputs[i] == config.label['benign']: pathRGB = os.path.join(path_ben_test, fnameRGB) pathPer = os.path.join(path_ben_test_per, fnamePer) img = skimage.io.imread(pathRGB) if img.shape == (1024, 1024, 3): img = img[::4, ::4, :] image_id = int(float(re.findall("\d+\.\d+", pathRGB)[0])) if image_id in stats.keys(): [src_mu, src_sigma] = stats[image_id] img_nmzd = htk_cnorm.reinhard(img, ref_mu_lab, ref_std_lab, src_mu=src_mu, src_sigma=src_sigma).astype('float') else: print '#### stats for %d not present' % (image_id) img_nmzd = htk_cnorm.reinhard(img, ref_mu_lab, ref_std_lab).astype('float') imgRGB = preprocess_resnet(img_nmzd) with open(pathPer, 'rb') as f: img = pickle.load(f) img = img / config.trainer.percentile_factor img = np.array([img]) imgPer = np.moveaxis(img, 0, 2) X_RGB[i] = imgRGB X_Per[i] = imgPer Y[i] = test_outputs[i] print 'RGB : ', X_RGB.shape print 'Per : ', X_Per.shape print 'len(Y) : ', len(Y) return [X_RGB, X_Per, Y]
def main(args): # # Read Input Image # print('>> Reading input image') im_input = skimage.io.imread(args.inputImageFile)[:, :, :3] # # Perform color normalization # print('>> Performing color normalization') # compute mean and stddev of input in LAB color space mu, sigma = htk_ccvt.lab_mean_std(im_input) # perform reinhard normalization im_nmzd = htk_cnorm.reinhard(im_input, mu, sigma) # # Perform color deconvolution # print('>> Performing color deconvolution') stain_color_1 = stain_color_map[args.stain_1] stain_color_2 = stain_color_map[args.stain_2] stain_color_3 = stain_color_map[args.stain_3] w = np.array([stain_color_1, stain_color_2, stain_color_3]).T im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # # Perform nuclei segmentation # print('>> Performing nuclei segmentation') # segment foreground im_fgnd_mask = sp.ndimage.morphology.binary_fill_holes( im_nuclei_stain < args.foreground_threshold) # run adaptive multi-scale LoG filter im_log = htk_shape_filters.clog(im_nuclei_stain, im_fgnd_mask, sigma_min=args.min_radius * np.sqrt(2), sigma_max=args.max_radius * np.sqrt(2)) im_nuclei_seg_mask, seeds, max = htk_seg.nuclear.max_clustering( im_log, im_fgnd_mask, args.local_max_search_radius) # filter out small objects im_nuclei_seg_mask = htk_seg.label.area_open( im_nuclei_seg_mask, args.min_nucleus_area).astype(np.int) # # Perform feature extraction # print('>> Performing feature extraction') im_nuclei = im_stains[:, :, 0] if args.cytoplasm_features: im_cytoplasm = im_stains[:, :, 1] else: im_cytoplasm = None df = htk_features.ComputeNucleiFeatures( im_nuclei_seg_mask, im_nuclei, im_cytoplasm, fsd_bnd_pts=args.fsd_bnd_pts, fsd_freq_bins=args.fsd_freq_bins, cyto_width=args.cyto_width, num_glcm_levels=args.num_glcm_levels, morphometry_features_flag=args.morphometry_features, fsd_features_flag=args.fsd_features, intensity_features_flag=args.intensity_features, gradient_features_flag=args.gradient_features, ) # # Create HDF5 file # print('>> Writing HDF5 file') hdf = pd.HDFStore(args.outputFile) hdf.put('d1', df, format='table', data_columns=True) print '--- Object x Features = ', hdf['d1'].shape
def test_segment_nuclei_kofahi(self): input_image_file = datastore.fetch('Easy1.png') ref_image_file = datastore.fetch('L1.png') # read input image im_input = skimage.io.imread(input_image_file)[:, :, :3] # read reference image im_reference = skimage.io.imread(ref_image_file)[:, :, :3] # get mean and stddev of reference image in lab space mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference) # perform color normalization im_nmzd = htk_cnorm.reinhard(im_input, mean_ref, std_ref) # perform color decovolution stain_color_map = { 'hematoxylin': [0.65, 0.70, 0.29], 'eosin': [0.07, 0.99, 0.11], 'dab': [0.27, 0.57, 0.78], 'null': [0.0, 0.0, 0.0] } w = htk_cdeconv.rgb_separate_stains_macenko_pca(im_nmzd, im_nmzd.max()) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains nuclei_channel = htk_cdeconv.find_stain_index( stain_color_map['hematoxylin'], w) im_nuclei_stain = im_stains[:, :, nuclei_channel].astype(np.float) # segment nuclei im_nuclei_seg_mask = htk_seg.nuclear.detect_nuclei_kofahi( im_nuclei_stain, im_nuclei_stain < 60, min_radius=20, max_radius=30, min_nucleus_area=80, local_max_search_radius=10) num_nuclei = len(np.unique(im_nuclei_seg_mask)) - 1 # check if segmentation mask matches ground truth gtruth_mask_file = os.path.join( datastore.fetch('Easy1_nuclei_seg_kofahi.npy')) im_gtruth_mask = np.load(gtruth_mask_file) num_nuclei_gtruth = len(np.unique(im_gtruth_mask)) - 1 assert num_nuclei == num_nuclei_gtruth np.testing.assert_allclose(im_nuclei_seg_mask, im_gtruth_mask) # check no nuclei case im_nuclei_seg_mask = htk_seg.nuclear.detect_nuclei_kofahi( 255 * np.ones_like(im_nuclei_stain), np.ones_like(im_nuclei_stain), min_radius=20, max_radius=30, min_nucleus_area=80, local_max_search_radius=10) num_nuclei = len(np.unique(im_nuclei_seg_mask)) - 1 assert num_nuclei == 0
def main(args): # # Read Input Image # print('>> Reading input image') imInput = skimage.io.imread(args.inputImageFile)[:, :, :3] # # Perform color normalization # print('>> Performing color normalization') # compute mean and stddev of input in LAB color space Mu, Sigma = htk_color_conversion.lab_mean_std(imInput) # perform reinhard normalization imNmzd = htk_color_normalization.reinhard(imInput, Mu, Sigma) # # Perform color deconvolution # print('>> Performing color deconvolution') stainColor_1 = stainColorMap[args.stain_1] stainColor_2 = stainColorMap[args.stain_2] stainColor_3 = stainColorMap[args.stain_3] W = np.array([stainColor_1, stainColor_2, stainColor_3]).T imDeconvolved = htk_color_deconvolution.ColorDeconvolution(imNmzd, W) imNucleiStain = imDeconvolved.Stains[::2, ::2, 0].astype(np.float) # # Perform nuclei segmentation # print('>> Performing nuclei segmentation') # segment foreground imFgndMask = sp.ndimage.morphology.binary_fill_holes( imNucleiStain < args.foreground_threshold) # run adaptive multi-scale LoG filter imLog = htk_shape_filters.clog(imNucleiStain, imFgndMask, sigma_min=args.min_radius * np.sqrt(2), sigma_max=args.max_radius * np.sqrt(2)) imNucleiSegMask, Seeds, Max = htk_seg.nuclear.max_clustering( imLog, imFgndMask, args.local_max_search_radius) # filter out small objects imNucleiSegMask = htk_seg.label.area_open( imNucleiSegMask, args.min_nucleus_area).astype(np.int) # # Perform feature extraction # print('>> Performing feature extraction') im_nuclei = imDeconvolved.Stains[::2, ::2, 0] if args.cytoplasm_features: im_cytoplasm = imDeconvolved.Stains[::2, ::2, 1] else: im_cytoplasm = None df = htk_features.ComputeNucleiFeatures( imNucleiSegMask, im_nuclei, im_cytoplasm, fsd_bnd_pts=args.fsd_bnd_pts, fsd_freq_bins=args.fsd_freq_bins, cyto_width=args.cyto_width, num_glcm_levels=args.num_glcm_levels, morphometry_features_flag=args.morphometry_features, fsd_features_flag=args.fsd_features, intensity_features_flag=args.intensity_features, gradient_features_flag=args.gradient_features, ) # # Create HDF5 file # print('>> Writing HDF5 file') hdf = pd.HDFStore(args.outputFile) hdf.put('d1', df, format='table', data_columns=True) print '--- Object x Features = ', hdf['d1'].shape
def test_segment_nuclei_kofahi(self): input_image_file = os.path.join(TEST_DATA_DIR, 'Easy1.png') ref_image_file = os.path.join(TEST_DATA_DIR, 'L1.png') # read input image im_input = skimage.io.imread(input_image_file)[:, :, :3] # read reference image im_reference = skimage.io.imread(ref_image_file)[:, :, :3] # get mean and stddev of reference image in lab space mean_ref, std_ref = htk_cvt.lab_mean_std(im_reference) # perform color normalization im_nmzd = htk_cnorm.reinhard(im_input, mean_ref, std_ref) # perform color decovolution stain_color_map = { 'hematoxylin': [0.65, 0.70, 0.29], 'eosin': [0.07, 0.99, 0.11], 'dab': [0.27, 0.57, 0.78], 'null': [0.0, 0.0, 0.0] } w = htk_cdeconv.rgb_separate_stains_macenko_pca(im_nmzd, im_nmzd.max()) im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains nuclei_channel = htk_cdeconv.find_stain_index(stain_color_map['hematoxylin'], w) im_nuclei_stain = im_stains[:, :, nuclei_channel].astype(np.float) # segment foreground (assumes nuclei are darker on a bright background) im_nuclei_fgnd_mask = sp.ndimage.morphology.binary_fill_holes( im_nuclei_stain < 60) # run adaptive multi-scale LoG filter im_log, im_sigma_max = htk_shape_filters.clog( im_nuclei_stain, im_nuclei_fgnd_mask, sigma_min=20 / np.sqrt(2), sigma_max=30 / np.sqrt(2)) # apply local maximum clustering im_nuclei_seg_mask, seeds, maxima = htk_seg.nuclear.max_clustering( im_log, im_nuclei_fgnd_mask, 10) # filter out small objects im_nuclei_seg_mask = htk_seg.label.area_open( im_nuclei_seg_mask, 80).astype(np.uint8) # perform connected component analysis obj_props = skimage.measure.regionprops(im_nuclei_seg_mask) num_nuclei = len(obj_props) # check if segmentation mask matches ground truth gtruth_mask_file = os.path.join(TEST_DATA_DIR, 'Easy1_nuclei_seg_kofahi_adaptive.npy') im_gtruth_mask = np.load(gtruth_mask_file) obj_props_gtruth = skimage.measure.regionprops(im_gtruth_mask) num_nuclei_gtruth = len(obj_props_gtruth) assert(num_nuclei == num_nuclei_gtruth) np.testing.assert_allclose(im_nuclei_seg_mask, im_gtruth_mask)
def compute_superpixel_data(img_path, tile_position, wsi_mean, wsi_stddev, args, **it_kwargs): # get slide tile source ts = large_image.getTileSource(img_path) # get requested tile information tile_info = ts.getSingleTile( tile_position=tile_position, resample=True, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) im_tile = tile_info['tile'][:, :, :3] # get global x and y positions left = tile_info['gx'] top = tile_info['gy'] # get scale scale = tile_info['gwidth'] / tile_info['width'] # perform color normalization im_nmzd = htk_cnorm.reinhard(im_tile, args.reference_mu_lab, args.reference_std_lab, wsi_mean, wsi_stddev) # compute the number of super-pixels im_width, im_height = im_nmzd.shape[:2] n_superpixels = (im_width / args.patchSize) * (im_height / args.patchSize) # # Generate labels using a superpixel algorithm (SLIC) # In SLIC, compactness controls image space proximity. # Higher compactness will make the shape of superpixels more square. # im_label = slic( im_nmzd, n_segments=n_superpixels, compactness=args.compactness) + 1 region_props = regionprops(im_label) # set superpixel data list s_data = [] x_cent = [] y_cent = [] for i in range(len(region_props)): # get x, y centroids for superpixel cen_x, cen_y = region_props[i].centroid # get bounds of superpixel region min_row, max_row, min_col, max_col = \ get_patch_bounds(cen_x, cen_y, args.patchSize, im_width, im_height) rgb_data = im_nmzd[min_row:max_row, min_col:max_col] s_data.append(rgb_data) # get superpixel centers at highest-res x_cent.append(round((cen_x * scale + top), 1)) y_cent.append(round((cen_y * scale + left), 1)) return s_data, x_cent, y_cent
def main(args): # # Read Input Image # print('>> Reading input image') imInput = skimage.io.imread(args.inputImageFile)[:, :, :3] # # Perform color normalization # print('>> Performing color normalization') # compute mean and stddev of input in LAB color space Mu, Sigma = htk_color_conversion.lab_mean_std(imInput) # perform reinhard normalization imNmzd = htk_color_normalization.reinhard(imInput, Mu, Sigma) # # Perform color deconvolution # print('>> Performing color deconvolution') stainColor_1 = stainColorMap[args.stain_1] stainColor_2 = stainColorMap[args.stain_2] stainColor_3 = stainColorMap[args.stain_3] W = np.array([stainColor_1, stainColor_2, stainColor_3]).T imDeconvolved = htk_color_deconvolution.ColorDeconvolution(imNmzd, W) imNucleiStain = imDeconvolved.Stains[:, :, 0].astype(np.float) # # Perform nuclei segmentation # print('>> Performing nuclei segmentation') # segment foreground imFgndMask = sp.ndimage.morphology.binary_fill_holes( imNucleiStain < args.foreground_threshold) # run adaptive multi-scale LoG filter imLog = htk_shape_filters.clog(imNucleiStain, imFgndMask, sigma_min=args.min_radius * np.sqrt(2), sigma_max=args.max_radius * np.sqrt(2)) imNucleiSegMask, Seeds, Max = htk_seg.nuclear.max_clustering( imLog, imFgndMask, args.local_max_search_radius) # filter out small objects imNucleiSegMask = htk_seg.label.area_open( imNucleiSegMask, args.min_nucleus_area).astype(np.int) # # Generate annotations # objProps = skimage.measure.regionprops(imNucleiSegMask) print 'Number of nuclei = ', len(objProps) # create basic schema annotation = { "name": "Nuclei", "description": "Nuclei bounding boxes from a segmentation algorithm", "attributes": { "algorithm": { "color_normalization": "reinhard", "color_deconvolution": "ColorDeconvolution", "nuclei_segmentation": ["cLOG", "MaxClustering", "FilterLabel"] } }, "elements": [] } # add each nucleus as an element into the annotation schema for i in range(len(objProps)): c = [objProps[i].centroid[1], objProps[i].centroid[0], 0] width = objProps[i].bbox[3] - objProps[i].bbox[1] + 1 height = objProps[i].bbox[2] - objProps[i].bbox[0] + 1 cur_bbox = { "type": "rectangle", "center": c, "width": width, "height": height, "rotation": 0, "fillColor": "rgba(255, 255, 255, 0)", "lineWidth": 2, "lineColor": "rgb(34, 139, 34)" } annotation["elements"].append(cur_bbox) # # Save output segmentation mask # print('>> Outputting nuclei segmentation mask') skimage.io.imsave(args.outputNucleiMaskFile, imNucleiSegMask) # # Save output annotation # print('>> Outputting nuclei annotation') with open(args.outputNucleiAnnotationFile, 'w') as annotationFile: json.dump(annotation, annotationFile, indent=2, sort_keys=False)
def test_reinhard(self): """Test reinhard.""" # get RGB image at a small magnification slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID) getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % ( SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0, slide_info['sizeY'] ) + "&magnification=%.2f" % MAG tissue_rgb = get_image_from_htk_response( gc.get(getStr, jsonResp=False)) # # SANITY CHECK! normalize to LAB mean and std from SAME slide # mean_lab, std_lab = lab_mean_std(tissue_rgb) # tissue_rgb_normalized = reinhard( # tissue_rgb, target_mu=mean_lab, target_sigma=std_lab) # # # we expect the images to be (almost) exactly the same # assert np.mean(tissue_rgb - tissue_rgb_normalized) < 1 # Normalize to pre-set color standard tissue_rgb_normalized = reinhard( tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma']) # check that it matches mean_lab, std_lab = lab_mean_std(tissue_rgb_normalized) self.assertTrue(all( np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1])) self.assertTrue(all( np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1])) # get tissue mask thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID) labeled, mask = get_tissue_mask( thumbnail_rgb, deconvolve_first=True, n_thresholding_steps=1, sigma=1.5, min_size=30) # # visualize result # vals = np.random.rand(256, 3) # vals[0, ...] = [0.9, 0.9, 0.9] # cMap = ListedColormap(1 - vals) # # f, ax = plt.subplots(1, 3, figsize=(20, 20)) # ax[0].imshow(thumbnail_rgb) # ax[1].imshow(labeled, cmap=cMap) # ax[2].imshow(mask, cmap=cMap) # plt.show() # Do MASKED normalization to preset standard mask_out = resize( labeled == 0, output_shape=tissue_rgb.shape[:2], order=0, preserve_range=True) == 1 tissue_rgb_normalized = reinhard( tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'], mask_out=mask_out) # check that it matches mean_lab, std_lab = lab_mean_std( tissue_rgb_normalized, mask_out=mask_out) self.assertTrue(all( np.abs(mean_lab - cnorm['mu']) < [0.1, 0.1, 0.1])) self.assertTrue(all( np.abs(std_lab - cnorm['sigma']) < [0.1, 0.1, 0.1]))
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs, src_mu_lab=None, src_sigma_lab=None, debug=False): # ========================================================================= # ======================= Tile Loading ==================================== # ========================================================================= print('\n>> Loading Tile ... \n') csv_dict = {} csv_dict['PreparationTime'] = [] csv_dict['ColorDeconvTime'] = [] csv_dict['TotalTileLoadingTime'] = [] csv_dict['CKPTLoadingTime'] = [] csv_dict['ModelInfernceTime'] = [] csv_dict['DetectionTime'] = [] csv_dict['ROIShape'] = [] csv_dict['ObjectsDict'] = [] csv_dict['NumObjects'] = [] csv_dict['AnnotationWritingTime'] = [] csv_dict['AnnotationDict'] = [] csv_dict['AnalysisDict'] = [] start_time = time.time() total_tileloading_start_time = time.time() ts = large_image.getTileSource(slide_path) tile_info = ts.getSingleTile( tile_position=tile_position, format=large_image.tilesource.TILE_FORMAT_NUMPY, **it_kwargs) im_tile = tile_info['tile'][:, :, :3] csv_dict['ROIShape'] = im_tile.shape[:2] prep_time = time.time() - start_time csv_dict['PreparationTime'] = round(prep_time, 3) # ========================================================================= # =================Img Normalization & Color Deconv======================== # ========================================================================= print('\n>> Color Deconvolving ... \n') start_time = time.time() im_nmzd = htk_cnorm.reinhard( im_tile, REFERENCE_MU_LAB, REFERENCE_STD_LAB, src_mu=src_mu_lab, src_sigma=src_sigma_lab ) # perform color decovolution if args.deconv_method == 'ruifrok': w = cli_utils.get_stain_matrix(args) im_stains = htk_cdeconv.color_deconvolution( im_nmzd, w).Stains.astype(np.float)[:, :, :2] elif args.deconv_method == 'macenko': w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_tile, 255) im_stains = htk_cdeconv.color_deconvolution( im_tile, w_est, 255).Stains.astype(np.float) ch1 = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map[args.stain_1], w_est) ch2 = htk_cdeconv.find_stain_index( htk_cdeconv.stain_color_map[args.stain_2], w_est) im_stains = im_stains[:, :, [ch1, ch2]] else: raise ValueError('Invalid deconvolution method parameter.') # ========================================================================= # ====================== Fuse the stain1 & stain2 pix====================== # ========================================================================= # compute nuclear foreground mask im_fgnd_mask_stain_1 = im_stains[ :, :, 0] < threshold_yen(im_stains[:, :, 0]) im_fgnd_mask_stain_2 = im_stains[ :, :, 1] < threshold_yen(im_stains[:, :, 1]) im_fgnd_seg_mask = im_fgnd_mask_stain_1 | im_fgnd_mask_stain_2 # segment nuclei im_nuc_det_input = np.squeeze(np.min(im_stains[:, :, :2], axis=2)) print('---> Fusing 2 Stains') deconv_time = time.time() - start_time csv_dict['ColorDeconvTime'] = round(deconv_time, 3) # ========================================================================= # ================= Nuclie Detection Deep Learning Block ================== # ========================================================================= total_tileloading_time = time.time() - total_tileloading_start_time csv_dict['TotalTileLoadingTime'] = round(total_tileloading_time, 3) start_time = time.time() config = get_config(CONFIG) config.model.rcnn.proposals.total_max_detections = args.max_det config.model.rcnn.proposals.min_prob_threshold = args.min_prob im_nuc_det_input = np.stack((im_nuc_det_input,) * 3, axis=-1) # ==================================================================================================================================== tf.reset_default_graph() dataset_class = get_dataset('object_detection') model_class = get_model('fasterrcnn') dataset = dataset_class(config) model = model_class(config) graph = tf.Graph() session = tf.Session(graph=graph) with graph.as_default(): image_placeholder = tf.placeholder( tf.float32, (None, None, 3), name='Input_Placeholder' ) pred_dict = model(image_placeholder) ckpt_loading_start_time = time.time() saver = tf.train.Saver(sharded=True, allow_empty=True) saver.restore(session, CKPT_DIR) tf.logging.info('Loaded checkpoint.') ckpt_loading_time = time.time() - ckpt_loading_start_time csv_dict['CKPTLoadingTime'] = round(ckpt_loading_time, 3) inference_start_time = time.time() cls_prediction = pred_dict['classification_prediction'] objects_tf = cls_prediction['objects'] objects_labels_tf = cls_prediction['labels'] objects_labels_prob_tf = cls_prediction['probs'] fetches = { 'objects': objects_tf, 'labels': objects_labels_tf, 'probs': objects_labels_prob_tf, } fetched = session.run(fetches, feed_dict={ image_placeholder: np.array(im_nuc_det_input) }) inference_time = time.time() - inference_start_time csv_dict['ModelInfernceTime'] = round(inference_time, 3) objects = fetched['objects'] labels = fetched['labels'].tolist() probs = fetched['probs'].tolist() # Cast to int to consistently return the same type in Python 2 and 3 objects = [ [int(round(coord)) for coord in obj] for obj in objects.tolist() ] predictions = sorted([ { 'bbox': obj, 'label': label, 'prob': round(prob, 4), } for obj, label, prob in zip(objects, labels, probs) ], key=lambda x: x['prob'], reverse=True) print('\n>> Finishing Detection ... \n') print('***** Number of Detected Cells ****** : ', len(predictions)) detection_time = time.time() - start_time csv_dict['DetectionTime'] = round(detection_time, 3) csv_dict['NumObjects'] = len(predictions) csv_dict['ObjectsDict'] = predictions # ========================================================================= # ======================= TODO: Implement border deletion ================= # ========================================================================= # ========================================================================= # ======================= Write Annotations =============================== # ========================================================================= start_time = time.time() objects_df = pd.DataFrame(objects) formatted_annot_list,\ formatter_analysis_list = cli_utils.convert_preds_to_utilformat( objects_df, probs, args.ignore_border_nuclei, im_tile_size=args.analysis_tile_size) nuclei_annot_list = cli_utils.create_tile_nuclei_annotations( formatted_annot_list, tile_info, args.nuclei_annotation_format) csv_dict['AnnotationDict'] = nuclei_annot_list csv_dict['AnalysisDict'] = formatter_analysis_list num_nuclei = len(nuclei_annot_list) anot_time = time.time() - start_time csv_dict['AnnotationWritingTime'] = round(anot_time, 3) return csv_dict
def main(args): # # Read Input Image # print('>> Reading input image') im_input = skimage.io.imread(args.inputImageFile)[:, :, :3] # # Perform color normalization # print('>> Performing color normalization') # compute mean and stddev of input in LAB color space mu, sigma = htk_ccvt.lab_mean_std(im_input) # perform reinhard normalization im_nmzd = htk_cnorm.reinhard(im_input, mu, sigma) # # Perform color deconvolution # print('>> Performing color deconvolution') stain_color_1 = stain_color_map[args.stain_1] stain_color_2 = stain_color_map[args.stain_2] stain_color_3 = stain_color_map[args.stain_3] w = np.array([stain_color_1, stain_color_2, stain_color_3]).T im_stains = htk_cdeconv.color_deconvolution(im_nmzd, w).Stains im_nuclei_stain = im_stains[:, :, 0].astype(np.float) # # Perform nuclei segmentation # print('>> Performing nuclei segmentation') # segment foreground im_fgnd_mask = sp.ndimage.morphology.binary_fill_holes( im_nuclei_stain < args.foreground_threshold) # run adaptive multi-scale LoG filter im_log = htk_shape_filters.clog(im_nuclei_stain, im_fgnd_mask, sigma_min=args.min_radius * np.sqrt(2), sigma_max=args.max_radius * np.sqrt(2)) im_nuclei_seg_mask, seeds, max = htk_seg.nuclear.max_clustering( im_log, im_fgnd_mask, args.local_max_search_radius) # filter out small objects im_nuclei_seg_mask = htk_seg.label.area_open( im_nuclei_seg_mask, args.min_nucleus_area).astype(np.int) # # Generate annotations # obj_props = skimage.measure.regionprops(im_nuclei_seg_mask) print 'Number of nuclei = ', len(obj_props) # create basic schema annotation = { "name": "Nuclei", "description": "Nuclei bounding boxes from a segmentation algorithm", "attributes": { "algorithm": { "color_normalization": "reinhard", "color_deconvolution": "ColorDeconvolution", "nuclei_segmentation": ["cLOG", "MaxClustering", "FilterLabel"] } }, "elements": [] } # add each nucleus as an element into the annotation schema for i in range(len(obj_props)): c = [obj_props[i].centroid[1], obj_props[i].centroid[0], 0] width = obj_props[i].bbox[3] - obj_props[i].bbox[1] + 1 height = obj_props[i].bbox[2] - obj_props[i].bbox[0] + 1 cur_bbox = { "type": "rectangle", "center": c, "width": width, "height": height, "rotation": 0, "fillColor": "rgba(255, 255, 255, 0)", "lineWidth": 2, "lineColor": "rgb(34, 139, 34)" } annotation["elements"].append(cur_bbox) # # Save output segmentation mask # print('>> Outputting nuclei segmentation mask') skimage.io.imsave(args.outputNucleiMaskFile, im_nuclei_seg_mask) # # Save output annotation # print('>> Outputting nuclei annotation') with open(args.outputNucleiAnnotationFile, 'w') as annotation_file: json.dump(annotation, annotation_file, indent=2, sort_keys=False)
def grid_tiling(gc, item_id, group_names, save_dir, save_mag=None, mask_mag=1.25, tile_size=(224, 224), tissue_threshold=0.3, annotation_threshold=0.15, random_seed=64, is_test=False, oversample_background=2.0, reinhard_stats=None): """Split a DSA image item (WSI) into smaller images and save locally grouped by annotations. This approach grids the image into equal sized small images, or tiles (i.e. a grid is placed over the WSI starting at the top left corner). At the bottom and right edge of the WSI the tiles are ignored if not of correct size (the case where the WSI dimensions are not a multiple factor of the tile size). A list of annotation group names are needed to group the tiles into classes of images saved in their own directories. Tiles with no tissue detected are ignored and tiles not containing annotations (but have tissue) are by default saved into background class. A background annotation group will cause issues so avoid having this annotation group name. Tiles can be saved at a lower magnification than source image if needed (param: save_mag). Note that tiles size specified should be the tile size at the save magnification not the source magnification. Image saved will be of the tile size specified in parameters, regardless of the save_mag used. Parameters ---------- gc : girder_client.GirderClient authenticated client item_id : str DSA image item id group_names : list list of annotation group names save_dir : str directory to create group directories with images - save_dir / group_name_1, save_dir / background, etc. save_mag : float (optional) magnification to use when saving the images, if None then source magnification will be used. mask_mag : float (optional) magnification to create binary mask of tissue and annotations. Note that if your annotations are very small it will benefit to use a larger value than default here, but binary masks will fail to create at very high magnifications. tile_size : tuple (optional) size (width, height) to save tiles at, note that this is the size it will be saved at regardless of the magnification used to save the images at (i.e. if save_mag is 4 times less than the source magnification than the actual tile_size will represent 4 times the pixels at full resolution). tissue_threshold : float (optional) from 0 to 1, percentage of tile that must contain tissue to be included annotation_threshold : float (optional) from 0 to 1, percentage of tile that must contain annotation (per group) to be labeled as annotation. Note that a single tile may be saved twice, representing multiple classes. random_seed : int (optional) random seed to use when shuffling the background regions is_test : bool (optional) if True then all the background regions will be saved, otherwise oversample_background will be used to determine how many background regions to save oversample_background : float (optional) factor to oversample background class images, compared to the number of images of the class of annoation images with the most images saved reinhard_stats : dict (optional) if not None then the images saved will be color augmented by color normalizing the tiles using the Reinhard color norm method. This dict should contain src_mu and scr_sigma keys with the stats for this image and target_mu and targe_sigma keys which are lists contain 1 or more target images to normalize to. """ im_info = gc.get('item/{}/tiles'.format(item_id)) if save_mag is None: save_mag = im_info['magnification'] if reinhard_stats is not None: # get color stats for image mu, sigma = reinhard_color_stats(gc, item_id) # ----- prep work ----- # filename = splitext(gc.getItem(item_id)['name'])[0] # create dirs for each image class to save group_dirs = [join(save_dir, group_name) for group_name in group_names] for group_dir in group_dirs: makedirs(group_dir, exist_ok=True) background_dir = join(save_dir, 'background') makedirs(background_dir, exist_ok=True) # get image annotations annotations = gc.get('/annotation/item/' + item_id) # create a dataframe to use with annotation to mask handler functions (gt codes) gt_data = [[group_name, 1, i + 1, 0, 0, 'rgb(0, 0, {})'.format(i), ''] for i, group_name in enumerate(group_names)] gt_codes = pd.DataFrame(columns=[ 'group', 'overlay_order', 'GT_code', 'is_roi', 'is_background_class', 'color', 'comments' ], data=gt_data, index=range(len(group_names))) gt_codes.index = gt_codes.loc[:, 'group'] # get binary masks - tissue mask and annotation(s) mask mask_mag_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=mask_mag) # - scaling the annotations to lower magnification mask_annotations = scale_slide_annotations(deepcopy(annotations), sf=mask_mag_factor) # - binary masks are for the whole image at low resolution, function returns also the RGB image which we use for # - getting the tissue mask mask_element_info = get_bboxes_from_slide_annotations(mask_annotations) get_kwargs = deepcopy( GET_KWARGS) # avoid referencing on the global variable get_kwargs['gc'] = gc get_kwargs['slide_id'] = item_id get_kwargs['GTCodes_dict'] = gt_codes.T.to_dict() get_kwargs['bounds'] = None get_kwargs['MAG'] = mask_mag ann_mask_and_image = get_image_and_mask_from_slide( mode='wsi', slide_annotations=mask_annotations, element_infos=mask_element_info, **get_kwargs) tissue_mask = get_tissue_mask(ann_mask_and_image['rgb'])[0] # convert the annotations to lower magnification fr_to_lr_factor, _ = get_scale_factor_and_appendStr(gc=gc, slide_id=item_id, MAG=save_mag) annotations = scale_slide_annotations(annotations, sf=fr_to_lr_factor) lr_element_info = get_bboxes_from_slide_annotations(annotations) # get full resolution information for image fr_mag = im_info['magnification'] fr_width = im_info['sizeX'] fr_height = im_info['sizeY'] fr_tile_size = int(tile_size[0] / fr_to_lr_factor), int( tile_size[1] / fr_to_lr_factor) # (width, height) # change the get_kwargs to save magnification get_kwargs['MAG'] = save_mag # ----- loop through image at full res ----- # group_annotation_counts = [0] * len(group_names) background_regions = [] for x in range(0, fr_width, fr_tile_size[0]): for y in range(0, fr_height, fr_tile_size[1]): # check that the tile won't go over the edge of image, if so skip if x + fr_tile_size[0] > fr_width or y + fr_tile_size[ 1] > fr_height: continue # check tile for tissue, using the binary mask for tissue tissue_tile = tissue_mask[int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)] # skip if tile does not contain enough tissue if np.count_nonzero( tissue_tile) / tissue_tile.size < tissue_threshold: continue # check tile for annotations, using the binary mask for annotations annotation_tile = ann_mask_and_image['ROI'][ int(y * mask_mag / fr_mag):int((y + fr_tile_size[1]) * mask_mag / fr_mag), int(x * mask_mag / fr_mag):int((x + fr_tile_size[0]) * mask_mag / fr_mag)] # tile is background if no annotation is present (of any group) background_flag = True # - check for each annotation group for i, group_name in enumerate(group_names): group_annotation_tile = annotation_tile == i + 1 # tile is ignored if not enough contain annotation if np.count_nonzero( group_annotation_tile ) / group_annotation_tile.size < annotation_threshold: continue background_flag = False group_annotation_counts[i] += 1 # get annotation image and save it get_kwargs['bounds'] = { 'XMIN': x, 'XMAX': x + fr_tile_size[0], 'YMIN': y, 'YMAX': y + fr_tile_size[1] } annotation_im = get_image_and_mask_from_slide( mode='manual_bounds', slide_annotations=annotations, element_infos=lr_element_info, **get_kwargs)['rgb'] # save the image to correct directory imwrite( join(group_dirs[i], '{}_x_{}_y_{}.png'.format(filename, x, y)), annotation_im) if reinhard_stats is not None: # add color augmentation with Reinhard method for j, (_, v) in enumerate(reinhard_stats.items()): im_norm = reinhard(annotation_im.copy(), v['mu'], v['sigma'], src_mu=mu, src_sigma=sigma) imwrite( join( group_dirs[i], '{}_x_{}_y_{}_norm_{}.png'.format( filename, x, y, j)), im_norm) if background_flag: # save coordinates for non-glomeruli images candidates background_regions.append({ 'magnification': save_mag, 'left': x, 'top': y, 'width': fr_tile_size[0], 'height': fr_tile_size[1] }) # randomly select background class coordinates # - oversample the background class by a factor of the most represented annoation class Random(random_seed).shuffle(background_regions) if not is_test: background_regions = background_regions[:int( oversample_background * max(group_annotation_counts))] for region in background_regions: tile_im = get_region_im(gc, item_id, region)[:, :, :3] # save background image imwrite( join( background_dir, '{}_x_{}_y_{}.png'.format(filename, region['left'], region['top'])), tile_im) if reinhard_stats is not None: # add color augmentation with Reinhard method for j, (_, v) in enumerate(reinhard_stats.items()): im_norm = reinhard(tile_im.copy(), v['mu'], v['sigma'], src_mu=mu, src_sigma=sigma) imwrite( join( background_dir, '{}_x_{}_y_{}_norm_{}.png'.format( filename, region['left'], region['top'], j)), im_norm)