def __init__(self, metadata, eval, remove_white): self.base_path = Path(__file__).parent first_region_id = list(metadata.keys())[0] pth = metadata[first_region_id]['wsipath'] pth = (self.base_path / pth).resolve().as_posix() self.scan = openslide.OpenSlide(pth) if remove_white: 'get low res. nuclei image/foreground mask' scan = self.wsis[pth] x, y = scan.level_dimensions[-1] mask = scan.read_region((0, 0), scan.level_count - 1, (x, y)).convert('RGB') mask = mask.resize((x // 4, y // 4)) mask = preprocessing.find_nuclei(mask) mask = Image.fromarray(mask.astype(np.uint8)).resize((x, y)) mask = np.asarray(mask) params = { 'iw': self.scan.level_dimensions[0][0], 'ih': self.scan.level_dimensions[0][1], 'tile_w': HR_PATCH_W, 'tile_h': HR_PATCH_H, 'scan_level': metadata[first_region_id]['scan_level'] } params = preprocessing.DotDict(params) ' build the datalist ' self.datalist = [] for key in metadata: region_obj = metadata[key].copy() if remove_white: 'given points, remove patches that are only white' region_obj[ 'cnt_xy'], num_cnt_pts = regiontools.remove_white_region( mask, region_obj['cnt_xy'], params) region_obj[ 'perim_xy'], num_perim_pts = regiontools.remove_white_region( mask, region_obj['perim_xy'], params) region_obj['cnt_xy'], num_cnt_pts = regiontools.map_points( region_obj['cnt_xy'], params) region_obj['perim_xy'], num_perim_pts = regiontools.map_points( region_obj['perim_xy'], params) if num_cnt_pts >= HR_NUM_CNT_SAMPLES and \ num_perim_pts >= HR_NUM_PERIM_SAMPLES: self.datalist.append(region_obj) self.eval = eval ' augmentation settings ' self.image_aug = preprocessing.standard_augmentor(self.eval)
wsipaths = sorted(glob.glob('{}/A*.svs'.format(args.raw_train_pth))) print(wsipaths) wsipath = wsipaths[0] scan = openslide.OpenSlide(wsipath) filename = os.path.basename(wsipath) 'get actual mask, i.e. the ground truth' xmlpath = '{}/{}.xml'.format(args.raw_train_pth, filename.split('.svs')[0]) gt = getGT(xmlpath, scan, level=args.scan_level) gt_rgb = np.eye(4)[gt][..., 1:] wsi = scan.read_region((0, 0), scan.level_count-1, scan.level_dimensions[scan.level_count-1]).convert('RGB') x_max, y_max = scan.level_dimensions[-1] wsi_mask = preprocessing.find_nuclei(wsi) n_labels, labels, stats, centers = cv2.connectedComponentsWithStats((gt > 0).astype(np.uint8)) centers = centers.astype(np.int) fig = plt.figure() ''' sorted_indices = stats[:, 4].argsort()[::-1] # based on area, largest to smallest stats = stats[sorted_indices, :] centers = centers[sorted_indices, :] ''' ms = 0.5 for tile_id in tqdm(range(1, n_labels)):
def __init__(self, pth, eval, remove_white, duplicate_dataset): self.base_path = Path(__file__).parent metadata_pth = (self.base_path / '../{}/gt.npy'.format(pth)).resolve().as_posix() metadata = ufs.fetch_metadata(metadata_pth) ''' dataset structure: dataset is comprised of patches+wsi regions. patches: metadata['P'] indicates where all the patches are. wsi: 0. metadata[filename/svs file name] 1. m[f][connected component id] 2. m[f][c][region within the connected component] @ level 1, we have the connected component as given in gt mask. at this level m[f][c][0] always points to the large region if the region is large enough, we then split it to smaller sub-regions at m[f][c][>=1]. ''' ' build the datalist ' self.datalist = [] cls = np.zeros(args.num_classes, ) ' build patch portion of ds ' if 'P' in metadata: P = copy.deepcopy(metadata['P'][0]) del metadata['P'] P_dims = {} for key in P: d = P[key]['dimensions'] if d not in P_dims: params = { 'num_center_points': HR_NUM_CNT_SAMPLES, 'num_perim_points': HR_NUM_PERIM_SAMPLES, 'scan_level': HR_SCAN_LEVEL, 'tile_w': HR_PATCH_W, 'tile_h': HR_PATCH_H, 'dimensions': d } params = preprocessing.DotDict(params) P_dims[d] = regiontools.get_key_points_for_patch(params) item = {**P[key], **P_dims[d]} self.datalist.append(item) cls[item['label']] += 1 ' build wsi regions portion ' self.wsis = {} for filename in metadata: first_region_id = list(metadata[filename].keys())[0] first_sub_region_id = list( metadata[filename][first_region_id].keys())[0] pth = metadata[filename][first_region_id][first_sub_region_id][ 'wsipath'] pth = (self.base_path / pth).resolve().as_posix() self.wsis[pth] = openslide.OpenSlide(pth) if remove_white: 'get low res. nuclei image/foreground mask' scan = self.wsis[pth] x, y = scan.level_dimensions[-1] mask = scan.read_region((0, 0), scan.level_count - 1, (x, y)).convert('RGB') mask = mask.resize((x // 4, y // 4)) mask = preprocessing.find_nuclei(mask) mask = Image.fromarray(mask.astype(np.uint8)).resize((x, y)) mask = np.asarray(mask) params = { 'iw': self.wsis[pth].level_dimensions[0][0], 'ih': self.wsis[pth].level_dimensions[0][1], 'tile_w': HR_PATCH_W, 'tile_h': HR_PATCH_H, 'scan_level': metadata[filename][first_region_id][first_sub_region_id] ['scan_level'] } params = preprocessing.DotDict(params) for conncomp in metadata[filename]: for id in metadata[filename][conncomp]: region_obj = metadata[filename][conncomp][id].copy() if remove_white: 'given points, remove patches that are only white' region_obj[ 'cnt_xy'], num_cnt_pts = regiontools.remove_white_region( mask, region_obj['cnt_xy'], params) region_obj[ 'perim_xy'], num_perim_pts = regiontools.remove_white_region( mask, region_obj['perim_xy'], params) 'which points valid for this patch size, scan level combo?' region_obj['cnt_xy'], num_cnt_pts = regiontools.map_points( region_obj['cnt_xy'], params) region_obj[ 'perim_xy'], num_perim_pts = regiontools.map_points( region_obj['perim_xy'], params) if num_cnt_pts >= HR_NUM_CNT_SAMPLES and \ num_perim_pts >= HR_NUM_PERIM_SAMPLES: self.datalist.append(region_obj) cls[region_obj['label']] += 1 self.eval = eval cls = np.array(cls) '''cls[0] += cls[1] cls[1] = cls[2] cls[2] = cls[3] cls[3] = 0''' print(cls) cls = cls / cls.sum() print(cls) if not self.eval: args.cls_ratios = cls ' augmentation settings ' self.image_aug = preprocessing.standard_augmentor(self.eval) if not self.eval: from itertools import chain self.datalist = list( chain(*[[i] * duplicate_dataset for i in self.datalist]))
def __init__(self, wsipth, params): self.params = params ' build the dataset ' self.datalist = [] 'read the wsi scan' filename = os.path.basename(wsipth) self.scan = openslide.OpenSlide(wsipth) ' if a slide has less levels than our desired scan level, ignore the slide' if len(self.scan.level_dimensions) - 1 >= args.scan_level: self.params.iw, self.params.ih = self.scan.level_dimensions[ args.scan_level] 'gt mask' 'find nuclei is slow, hence save masks from preprocessing' \ 'for later use' msk_pth = '{}/{}.png'.format(args.wsi_mask_pth, filename) if not os.path.exists(msk_pth): thmb = self.scan.read_region( (0, 0), 2, self.scan.level_dimensions[2]).convert('RGB') mask = preprocessing.find_nuclei(thmb) Image.fromarray(mask.astype(np.uint8)).save(msk_pth) else: mask = Image.open(msk_pth).convert('L') mask = np.asarray(mask) ' augmentation settings ' self.image_aug = preprocessing.standard_augmentor(True) 'downsample multiplier' m = self.scan.level_downsamples[ args.scan_level] / self.scan.level_downsamples[2] dx, dy = int(self.params.pw * m), int(self.params.ph * m) for ypos in range(1, self.params.ih - 1 - self.params.ph, self.params.sh): for xpos in range(1, self.params.iw - 1 - self.params.pw, self.params.sw): yp, xp = int(ypos * m), int(xpos * m) if not preprocessing.isforeground(mask[yp:yp + dy, xp:xp + dx]): continue self.datalist.append((xpos, ypos)) xpos = self.params.iw - 1 - self.params.pw for ypos in range(1, self.params.ih - 1 - self.params.ph, self.params.sh): yp, xp = int(ypos * m), int(xpos * m) if not preprocessing.isforeground(mask[yp:yp + dy, xp:xp + dx]): continue self.datalist.append((xpos, ypos)) ypos = self.params.ih - 1 - self.params.ph for xpos in range(1, self.params.iw - 1 - self.params.pw, self.params.sw): yp, xp = int(ypos * m), int(xpos * m) if not preprocessing.isforeground(mask[yp:yp + dy, xp:xp + dx]): continue self.datalist.append((xpos, ypos))