def test_patch_bk_ratio(): img_path = os.path.join(PRJ_PATH, "test/data/Images/3c32efd9.png") img = io.imread(img_path) bk_ratio = patch.patch_bk_ratio(img, bk_thresh=0.80) if bk_ratio > 1 or bk_ratio < 0: raise AssertionError("Ratio not in the range.")
def gen_wsi_feas(patch_model, img_dir, fea_dir, args): img_list = [ele for ele in os.listdir(img_dir) if "jpg" in ele] for ind, ele in enumerate(img_list): img_name = os.path.splitext(ele)[0] if ind > 0 and ind % 10 == 0: print("processing {:03d}/{:03d}, {}".format(ind, len(img_list), img_name)) feas_list, probs_list, coor_list = [], [], [] img_path = os.path.join(img_dir, ele) cur_img = io.imread(img_path) # split coors and save patches coors_arr = wsi_stride_splitting(cur_img.shape[0], cur_img.shape[1], args.patch_len, args.stride_len) patch_list = [] for ind, coor in enumerate(coors_arr): start_h, start_w = coor[0], coor[1] patch_img = cur_img[start_h:start_h+args.patch_len, start_w:start_w+args.patch_len] # image background control if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) <= 0.88: patch_list.append(patch_img) coor_list.append([start_h, start_w, start_h+args.patch_len, start_w+args.patch_len]) # Processing the feature extraction in batch-wise manner to avoid huge memory consumption if len(patch_list) == 16 or ind+1 == len(coors_arr): patch_arr = np.asarray(patch_list) patch_dset = PatchDataset(patch_arr) patch_loader = DataLoader(patch_dset, batch_size=16, shuffle=False, num_workers=4, drop_last=False) with torch.no_grad(): for inputs in patch_loader: batch_tensor = Variable(inputs.cuda()) feas, probs = extract_model_feas(patch_model, batch_tensor, args) batch_feas = feas.cpu().data.numpy().tolist() batch_probs = probs.cpu().data.numpy().tolist() feas_list.extend(batch_feas) probs_list.extend(batch_probs) patch_list = [] all_feas = np.asarray(feas_list).astype(np.float32) all_probs = np.asarray(probs_list).astype(np.float32) all_coors = np.asarray(coor_list).astype(np.float32) sorted_ind = np.argsort(all_probs[:, 0]) sorted_feas = all_feas[sorted_ind] sorted_probs = all_probs[sorted_ind] sorted_coors = all_coors[sorted_ind] if len(feas_list) != len(probs_list) or len(feas_list) != len(coor_list): print("{} feas/probs/coors not consistent.".format(img_name)) else: patch_fea_dict = { "feas": sorted_feas, "probs": sorted_probs, "coors": sorted_coors, } if not os.path.exists(args.fea_dir): os.makedirs(args.fea_dir) dd.io.save(os.path.join(args.fea_dir, img_name+".h5"), patch_fea_dict)
def gen_wsi_feas(patch_model, img_path, args): img_name = os.path.splitext(img_path)[0] feas_list, probs_list, coor_list = [], [], [] cur_img = io.imread(img_path) # split coors and save patches coors_arr = wsi_stride_splitting(cur_img.shape[0], cur_img.shape[1], args.patch_len, args.stride_len) patch_list = [] for ind, coor in enumerate(coors_arr): start_h, start_w = coor[0], coor[1] patch_img = cur_img[start_h:start_h + args.patch_len, start_w:start_w + args.patch_len] # image background control if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) <= 0.88: patch_list.append(patch_img) coor_list.append([ start_h, start_w, start_h + args.patch_len, start_w + args.patch_len ]) # Processing the feature extraction in batch-wise manner to avoid huge memory consumption if len(patch_list) == args.cls_batch_size or ind + 1 == len(coors_arr): patch_arr = np.asarray(patch_list) patch_dset = ClsPatchDataset(patch_arr) patch_loader = DataLoader(patch_dset, batch_size=args.cls_batch_size, shuffle=False, num_workers=0, drop_last=False) with torch.no_grad(): for inputs in patch_loader: batch_tensor = Variable(inputs.cuda()) feas, probs = extract_model_feas(patch_model, batch_tensor, args) batch_feas = feas.cpu().data.numpy().tolist() batch_probs = probs.cpu().data.numpy().tolist() feas_list.extend(batch_feas) probs_list.extend(batch_probs) patch_list = [] all_feas = np.asarray(feas_list).astype(np.float32) all_probs = np.asarray(probs_list).astype(np.float32) sorted_ind = np.argsort(all_probs[:, 0]) feas_placeholder = np.zeros((args.wsi_patch_num, all_feas.shape[1]), dtype=np.float32) test_patch_num = min(len(all_feas), args.wsi_patch_num) chosen_total_ind = sorted_ind[:test_patch_num] feas_placeholder[:test_patch_num] = all_feas[chosen_total_ind] chosen_coors = np.asarray(coor_list)[chosen_total_ind].tolist() return feas_placeholder, test_patch_num, chosen_coors
def gen_patches(img_dir, patch_dir, patch_size=448): img_list = pydaily.filesystem.find_ext_files(img_dir, "jpg") img_list = [os.path.basename(ele) for ele in img_list] pos_patch_dir = os.path.join(patch_dir, "1Pos") if not os.path.exists(pos_patch_dir): os.makedirs(pos_patch_dir) neg_patch_dir = os.path.join(patch_dir, "0Neg") if not os.path.exists(neg_patch_dir): os.makedirs(neg_patch_dir) pos_num, neg_num = 0, 0 for ind, ele in enumerate(img_list): if ind > 0 and ind % 10 == 0: print("processing {}/{}".format(ind, len(img_list))) img_path = os.path.join(img_dir, ele) mask_path = os.path.join(img_dir, os.path.splitext(ele)[0] + ".png") cur_img = io.imread(img_path) cur_mask = io.imread(mask_path) # split coors and save patches coors_arr = patch.wsi_coor_splitting(cur_img.shape[0], cur_img.shape[1], patch_size, overlap_flag=True) for coor in coors_arr: start_h, start_w = coor[0], coor[1] patch_img = cur_img[start_h:start_h + patch_size, start_w:start_w + patch_size] # image background control if patch.patch_bk_ratio(patch_img, bk_thresh=0.864) > 0.88: continue # mask control patch_mask = cur_mask[start_h:start_h + patch_size, start_w:start_w + patch_size] pixel_ratio = np.sum(patch_mask > 0) * 1.0 / patch_mask.size patch_name = str(uuid.uuid4())[:8] if pixel_ratio >= 0.05: io.imsave(os.path.join(pos_patch_dir, patch_name + ".png"), patch_img) pos_num += 1 else: if np.random.random_sample() <= 0.80 and "neg" in img_dir: continue io.imsave(os.path.join(neg_patch_dir, patch_name + ".png"), patch_img) neg_num += 1 print("There are {} pos samples and {} neg samples".format( pos_num, neg_num))
def gen_samples(slides_dir, patch_level, patch_size, tumor_type, slide_list, dset, overlap_mode): # prepare saving directory patch_path = os.path.join(os.path.dirname(slides_dir), "Patches", tumor_type) patch_img_dir = os.path.join(patch_path, dset, "imgs") if not os.path.exists(patch_img_dir): os.makedirs(patch_img_dir) patch_mask_dir = os.path.join(patch_path, dset, "masks") if not os.path.exists(patch_mask_dir): os.makedirs(patch_mask_dir) # processing slide one-by-one ttl_patch = 0 slide_list.sort() for ind, ele in enumerate(slide_list): print("Processing {} {}/{}".format(ele, ind+1, len(slide_list))) cur_slide_path = os.path.join(slides_dir, ele+".svs") if os.path.exists(cur_slide_path): cur_slide_path = os.path.join(slides_dir, ele+".svs") # locate contours and generate batches based on tissue contours cnts, d_factor = tl.locate_tissue_cnts(cur_slide_path, max_img_size=2048, smooth_sigma=13, thresh_val=0.88, min_tissue_size=120000) select_level, select_factor = tl.select_slide_level(cur_slide_path, max_size=2048) cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True) # scale contour to slide level 2 wsi_head = pyramid.load_wsi_head(cur_slide_path) cnt_scale = select_factor / int(wsi_head.level_downsamples[patch_level]) tissue_arr = cv_cnt_to_np_arr(cnts[0] * cnt_scale).astype(np.int32) # convert tissue_arr to convex if poly is not valid tissue_poly = np_arr_to_poly(tissue_arr) if tissue_poly.is_valid == False: tissue_arr = poly_to_np_arr(tissue_poly.convex_hull).astype(int) coors_arr = None if overlap_mode == "half_overlap": level_w, level_h = wsi_head.level_dimensions[patch_level] coors_arr = contour.contour_patch_splitting_half_overlap(tissue_arr, level_h, level_w, patch_size, inside_ratio=0.80) elif overlap_mode == "self_overlap": coors_arr = contour.contour_patch_splitting_self_overlap(tissue_arr, patch_size, inside_ratio=0.80) else: raise NotImplementedError("unknown overlapping mode") wsi_img = wsi_head.read_region((0, 0), patch_level, wsi_head.level_dimensions[patch_level]) wsi_img = np.asarray(wsi_img)[:,:,:3] mask_path = os.path.join(slides_dir, "_".join([ele, tumor_type+".tif"])) mask_img = io.imread(mask_path) wsi_mask = (transform.resize(mask_img, wsi_img.shape[:2], order=0) * 255).astype(np.uint8) * 255 if dset == "val": test_slides_dir = os.path.join(os.path.dirname(slides_dir), "TestSlides") if not os.path.exists(os.path.join(test_slides_dir, cur_slide_path)): shutil.copy(cur_slide_path, test_slides_dir) if not os.path.exists(os.path.join(test_slides_dir, mask_path)): shutil.copy(mask_path, test_slides_dir) for cur_arr in coors_arr: cur_h, cur_w = cur_arr[0], cur_arr[1] cur_patch = wsi_img[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size] if cur_patch.shape[0] != patch_size or cur_patch.shape[1] != patch_size: continue cur_mask = wsi_mask[cur_h:cur_h+patch_size, cur_w:cur_w+patch_size] # background RGB (235, 210, 235) * [0.299, 0.587, 0.114] if patch.patch_bk_ratio(cur_patch, bk_thresh=0.864) > 0.88: continue if overlap_mode == "half_overlap" and tumor_type == "viable": pixel_ratio = np.sum(cur_mask > 0) * 1.0 / cur_mask.size if pixel_ratio < 0.05: continue patch_name = ele + "_" + str(uuid.uuid1())[:8] io.imsave(os.path.join(patch_img_dir, patch_name+".jpg"), cur_patch) io.imsave(os.path.join(patch_mask_dir, patch_name+".png"), cur_mask) ttl_patch += 1 print("There are {} patches in total.".format(ttl_patch))