def get_bounding_box_around_mask(self, color): # if so, then determine how much to resize things by for the bounding box slide = ops.open_slide(self.svsPath) dz = deepzoom.DeepZoomGenerator(slide, tile_size=self.patch_size, overlap=0, limit_bounds=True) max_mag = int(slide.properties['openslide.objective-power']) resize_factor = float(self.mag / max_mag) # determine the original bounding box from the annotation regions = self.pxml.get_all_regions(color) all_x = np.array([]) all_y = np.array([]) for idx, each_region in enumerate(regions): for each_vertex in each_region.find_all('vertex'): all_x = np.append(all_x, int(float(each_vertex.get('x')))) all_y = np.append(all_y, int(float(each_vertex.get('y')))) all_x = all_x * resize_factor all_y = all_y * resize_factor # apply resize factor min_x = int(np.min(all_x)) max_x = int(np.max(all_x)) min_y = int(np.min(all_y)) max_y = int(np.max(all_y)) return (min_x, min_y, max_x, max_y)
def extract_roi(self): # get useful information start_col, start_row, end_col, end_row = self.find_tile_in_dz() slide = ops.open_slide(self.svsPath) dz = deepzoom.DeepZoomGenerator(slide, tile_size=self.patch_size, overlap=0, limit_bounds=True) level = self.dict_mag_level[self.mag] print("level: ", level) #initialize empty array the size of the ROI we need w = (end_col - start_col) * self.patch_size h = (end_row - start_row) * self.patch_size io = np.zeros((h, w, 3), dtype='uint8') # not sure if it should by y,x or x,y but either all of the rows and columns needs to be flipped for i in range(start_col, end_col): for j in range(start_row, end_row): patch = dz.get_tile(level, (i, j)) x_start = (i - start_col) * self.patch_size x_end = x_start + self.patch_size y_start = (j - start_row) * self.patch_size y_end = y_start + self.patch_size io[y_start:y_end, x_start:x_end, :] = patch return io
def get_slide_tile_information(self): slide = ops.open_slide(self.svsPath) dz = deepzoom.DeepZoomGenerator(slide, tile_size=self.patch_size, overlap=0, limit_bounds=True) level = self.dict_mag_level[mag] w, h = dz.level_dimensions[level] n, m = dz.level_tiles[level] return (n, m, w, h)
def svs2dask_array(svs_file, tile_size=1000, overlap=0, remove_last=True, allow_unknown_chunksizes=False): """Convert SVS, TIF or TIFF to dask array. Parameters ---------- svs_file:str Image file. tile_size:int Size of chunk to be read in. overlap:int Do not modify, overlap between neighboring tiles. remove_last:bool Remove last tile because it has a custom size. allow_unknown_chunksizes: bool Allow different chunk sizes, more flexible, but slowdown. Returns ------- dask.array Dask Array. >>> arr=svs2dask_array(svs_file, tile_size=1000, overlap=0, remove_last=True, allow_unknown_chunksizes=False) >>> arr2=arr.compute() >>> arr3=to_pil(cv2.resize(arr2, dsize=(1440,700), interpolation=cv2.INTER_CUBIC)) >>> arr3.save(test_image_name)""" img = openslide.open_slide(svs_file) gen = deepzoom.DeepZoomGenerator(img, tile_size=tile_size, overlap=overlap, limit_bounds=True) max_level = len(gen.level_dimensions) - 1 n_tiles_x, n_tiles_y = gen.level_tiles[max_level] get_tile = lambda i, j: np.array(gen.get_tile(max_level, (i, j))).transpose((1, 0, 2)) sample_tile = get_tile(0, 0) sample_tile_shape = sample_tile.shape dask_get_tile = dask.delayed(get_tile, pure=True) arr = da.concatenate([ da.concatenate([ da.from_delayed(dask_get_tile(i, j), sample_tile_shape, np.uint8) for j in range(n_tiles_y - (0 if not remove_last else 1)) ], allow_unknown_chunksizes=allow_unknown_chunksizes, axis=1) for i in range(n_tiles_x - (0 if not remove_last else 1)) ], allow_unknown_chunksizes=allow_unknown_chunksizes ) #.transpose([1,0,2]) return arr
def buildDictionary(self): slide = ops.open_slide(self.svsPath) dz = deepzoom.DeepZoomGenerator(slide, tile_size=self.patch_size, overlap=0, limit_bounds=True) levels = dz.level_count max_mag = int(slide.properties['openslide.objective-power']) counter = 1 dict_level_mag_correspondence = {} for i in reversed(range(0, levels)): dict_level_mag_correspondence[max_mag / counter] = i counter = counter * 2 return dict_level_mag_correspondence
def svs2dask_array(svs_file, tile_size=1000, overlap=0, remove_last=True, allow_unknown_chunksizes=False, transpose=False): """Convert SVS, TIF or TIFF to dask array. Parameters ---------- svs_file : str Image file. tile_size : int Size of chunk to be read in. overlap : int Do not modify, overlap between neighboring tiles. remove_last : bool Remove last tile because it has a custom size. allow_unknown_chunksizes : bool Allow different chunk sizes, more flexible, but slowdown. Returns ------- arr : dask.array.Array A Dask Array representing the contents of the image file. >>> arr = svs2dask_array(svs_file, tile_size=1000, overlap=0, remove_last=True, allow_unknown_chunksizes=False) >>> arr2 = arr.compute() >>> arr3 = to_pil(cv2.resize(arr2, dsize=(1440, 700), interpolation=cv2.INTER_CUBIC)) >>> arr3.save(test_image_name) """ # https://github.com/jlevy44/PathFlowAI/blob/master/pathflowai/utils.py img = openslide.open_slide(svs_file) if type(img) is openslide.OpenSlide: gen = deepzoom.DeepZoomGenerator( img, tile_size=tile_size, overlap=overlap, limit_bounds=True) max_level = len(gen.level_dimensions) - 1 n_tiles_x, n_tiles_y = gen.level_tiles[max_level] @dask.delayed(pure=True) def get_tile(level, column, row): tile = gen.get_tile(level, (column, row)) return np.array(tile).transpose((1, 0, 2)) sample_tile_shape = get_tile(max_level, 0, 0).shape.compute() rows = range(n_tiles_y - (0 if not remove_last else 1)) cols = range(n_tiles_x - (0 if not remove_last else 1)) arr = da.concatenate([da.concatenate([da.from_delayed(get_tile(max_level, col, row), sample_tile_shape, np.uint8) for row in rows], allow_unknown_chunksizes=allow_unknown_chunksizes, axis=1) for col in cols], allow_unknown_chunksizes=allow_unknown_chunksizes) if transpose: arr=arr.transpose([1, 0, 2]) return arr else: # img is instance of openslide.ImageSlide return dask_image.imread.imread(svs_file)
def get_preview(slidename, classifier, level, size, slide_folder, neg=0): result = [] slidename = os.path.basename(slidename) slidepath = os.path.join(slide_folder, slidename) slide = OpenSlide(slidepath) slide_dz = deepzoom.DeepZoomGenerator( slide, tile_size=(size - 2), overlap=1) tiles = slide_dz.level_tiles[level] preview = numpy.zeros(tiles) for x in classifier: im_x = int(x[1]) im_y = int(x[2]) if x[4] == neg and x[5] == neg: preview[im_x][im_y] = 1 else: cluster = x[4] preview[im_x][im_y] = cluster + 2 result.extend((slidename, preview)) return result
def get_probability_images(slidename, classifier, slide_folder, features=30): result = [] slidepath = os.path.join(slide_folder, slidename) slide = OpenSlide(slidepath) slide_dz = deepzoom.DeepZoomGenerator(slide, tile_size=(224 - 2), overlap=1) tiles = slide_dz.level_tiles[16] preview = numpy.zeros((tiles[0], tiles[1], features + 1)) for x in classifier: im_x = int(x[1]) im_y = int(x[2]) if x[3] == 0: preview[im_x][im_y][0] = 1 else: for f in range(features): preview[im_x][im_y][f + 1] = x[4 + f] image_name = os.path.join(outpath, '{}#{}features.npy'.format(slidename, features)) numpy.save(image_name, preview) return image_name
def get(self, path): with self._lock: if path in self._cache: # Move to end of LRU slide = self._cache.pop(path) self._cache[path] = slide return slide osr = openslide.OpenSlide(path) slide = deepzoom.DeepZoomGenerator(osr, **self.dz_opts) try: mpp_x = osr.properties[openslide.PROPERTY_NAME_MPP_X] mpp_y = osr.properties[openslide.PROPERTY_NAME_MPP_Y] slide.mpp = (float(mpp_x) + float(mpp_y)) / 2 except (KeyError, ValueError): slide.mpp = 0 with self._lock: if path not in self._cache: if len(self._cache) == self.cache_size: self._cache.popitem(last=False) self._cache[path] = slide return slide
def extract_patches(img_id, msk_id, level=1, sz=128): """ This function splits each slide into patches and save them onto your computer. Each patch is saved based on its class (stroma, benign, pattern 3, 4, and 5). Patches from the same slide is under the same slide id folder. A csv file containing each patch's filename and class is generated. A csv file containing slides IDs of extracted patches is also generated. """ patches = [] # Store patch filename and class # Here 128 doesn't work for some reasons, so 126 is chosen. No overlapping # between patches. Lower this number can generating patches with overlapping # areas. tile_size = 126 overlap = int( (sz - tile_size) / 2) # Size of overlapping areas between patches # Output csv path for patch filename and class csv_path = "./prostate-cancer-grade-assessment/patches_level1_" + str( sz) + ".csv" for i in trange(len(img_id)): # Read slides im = openslide.OpenSlide(images_path + img_id[i]) dpz = deepzoom.DeepZoomGenerator(im, tile_size=tile_size, overlap=overlap, limit_bounds=False) width = dpz.level_tiles[dpz.level_count - 3][0] height = dpz.level_tiles[dpz.level_count - 3][1] offset = int(np.ceil(overlap / tile_size)) # Store slide tiles into a dictionary tiles1 = {} count = 0 for j in range(offset, dpz.level_tiles[dpz.level_count - 3][1] - 1 - offset): for k in range( offset, dpz.level_tiles[dpz.level_count - 3][0] - 1 - offset): tiles1[count] = np.asarray( dpz.get_tile(dpz.level_count - 3, (k, j))) count += 1 im.close() # Read masks im = openslide.OpenSlide(masks_path + msk_id[i]) dpz = deepzoom.DeepZoomGenerator(im, tile_size=tile_size, overlap=overlap, limit_bounds=False) width = dpz.level_tiles[dpz.level_count - 3][0] height = dpz.level_tiles[dpz.level_count - 3][1] offset = int(np.ceil(overlap / tile_size)) # Store mask tiles into a dictionary tiles2 = {} count = 0 for j in range(offset, dpz.level_tiles[dpz.level_count - 3][1] - 1 - offset): for k in range( offset, dpz.level_tiles[dpz.level_count - 3][0] - 1 - offset): tiles2[count] = np.asarray( dpz.get_tile(dpz.level_count - 3, (k, j))) count += 1 im.close() if len(tiles1) > 0: # Rank slide tiles by counting color pixels on mask tiles tiles2 = { k: v for k, v in sorted(tiles2.items(), key=lambda item: item[1].sum(), reverse=True) } iteration = 0 # Score mask tiles for tile_idx in tiles2.keys(): score, relative_score = grade(tiles2[tile_idx]) # Find class # Patches contain more than 1 color are not used tile_score = -1 if np.sum(score) == 1: if np.sort(relative_score)[:3].sum() == 0: if score[np.argmax(relative_score) + 1] > 0.4: tile_score = np.argmax(relative_score) + 2 elif score[0] > 0.7: tile_score = 1 if tile_score >= 1: if if_clean(tiles1[tile_idx]): # Save patches im = Image.fromarray(tiles1[tile_idx]) temp1 = "./prostate-cancer-grade-assessment/patches_level1_" + \ str(sz) + "/" temp2 = img_id[i].split(".")[0] + "/" temp3 = str(tile_score) + "/" if not os.path.isdir(temp1): os.mkdir(temp1) if not os.path.isdir(temp1 + temp2): os.mkdir(temp1 + temp2) if not os.path.isdir(temp1 + temp2 + temp3): os.mkdir(temp1 + temp2 + temp3) # Filename example: /#slideID/#classNumber/#patchFilename.tiff im.save(temp1 + temp2 + temp3 + str(iteration) + img_id[i]) # Append patch filenamd and class to csv patches.append( [str(iteration) + img_id[i], str(tile_score)]) iteration += 1 # Save csv file containing patch filename and class np.savetxt(csv_path, patches, delimiter=",", fmt="%s") # Save csv file containing slide IDs of extracted patches folders = os.listdir(temp1) csv_content = [] for folder in folders: if os.path.isdir(folder): csv_content.append(folder) np.savetxt(patches_level1_128_slideFolders_path, csv_content, delimiter=",", fmt="%s")
def __create_tiles(self, mask, bg_color): """Create tiles given a PySlide and a mask. Arguments: mask: PIL Image containing the mask for the slide. bg_color: Numpy array indicating the color used for the background in the mask. """ ts = time.time() # Create folder for the patches if self.input_slide.save_patches: self.input_slide._create_tile_folder() # Initialize deep zoom generator for the slide image_dims = self.input_slide.slide.dimensions dzg = deepzoom.DeepZoomGenerator(self.input_slide.slide, tile_size=self.input_slide.patch_size, overlap=0) # Find the deep zoom level corresponding to the # requested downsampling factor dzg_levels = [2**i for i in range(0, dzg.level_count)][::-1] dzg_selectedlevel_idx = dzg_levels.index(self.input_slide.output_downsample) dzg_selectedlevel_dims = dzg.level_dimensions[dzg_selectedlevel_idx] dzg_selectedlevel_maxtilecoords = dzg.level_tiles[dzg_selectedlevel_idx] dzg_real_downscaling = np.divide(image_dims, dzg.level_dimensions)[:, 0][dzg_selectedlevel_idx] n_tiles = np.prod(dzg_selectedlevel_maxtilecoords) digits_padding = len(str(n_tiles)) # Calculate patch size in the mask mask_patch_size = int(np.ceil(self.input_slide.patch_size * (self.input_slide.output_downsample/self.input_slide.mask_downsample))) # Deep zoom generator for the mask dzgmask = deepzoom.DeepZoomGenerator(openslide.ImageSlide(mask), tile_size=mask_patch_size, overlap=0) dzgmask_dims = dzgmask.level_dimensions[dzgmask.level_count - 1] dzgmask_maxtilecoords = dzgmask.level_tiles[dzgmask.level_count - 1] dzgmask_ntiles = np.prod(dzgmask_maxtilecoords) # If needed, generate an image to store tile-crossed output, at the requested tilecross downsample level if self.input_slide.save_tilecrossed_image: # Get a downsampled numpy array for the image tilecrossed_img = utility_functions.downsample_image(self.input_slide.slide, self.input_slide.tilecross_downsample, mode="numpy")[0] # Calculate patch size in the mask tilecross_patchsize = int(np.ceil(self.input_slide.patch_size * (self.input_slide.output_downsample/self.input_slide.tilecross_downsample))) # Draw the grid at the scaled patchsize x_shift, y_shift = tilecross_patchsize, tilecross_patchsize gcol = [255, 0, 0] tilecrossed_img[:, ::y_shift, :] = gcol tilecrossed_img[::x_shift, :, :] = gcol # Convert numpy array to PIL image tilecrossed_img = Image.fromarray(tilecrossed_img, mode="RGB") # Create object to draw the crosses for each tile draw = ImageDraw.Draw(tilecrossed_img) # Counters for iterating through the tile-crossed image tiles tc_w = 0 tc_h = 0 # Debug information logging.debug("** Original image information **") logging.debug("-Dimensions: " + str(image_dims)) logging.debug("** Mask information **") logging.debug("-Mask downscaling factor: " + str(self.input_slide.mask_downsample)) logging.debug("-Pixel dimensions: " + str(dzgmask_dims)) logging.debug("-Calculated patch size: " + str(mask_patch_size)) logging.debug("-Max tile coordinates: " + str(dzgmask_maxtilecoords)) logging.debug("-Number of tiles: " + str(dzgmask_ntiles)) logging.debug("** Output image information **") logging.debug("Requested " + str(self.input_slide.output_downsample) + "x downsampling for output.") logging.debug("** Properties of selected deep zoom level **") logging.debug("-Real downscaling factor: " + str(dzg_real_downscaling)) logging.debug("-Pixel dimensions: " + str(dzg_selectedlevel_dims)) logging.debug("-Selected patch size: " + str(self.input_slide.patch_size)) logging.debug("-Max tile coordinates: " + str(dzg_selectedlevel_maxtilecoords)) logging.debug("-Number of tiles: " + str(n_tiles)) logging.info("== Selecting tiles ==") if dzgmask_maxtilecoords != dzg_selectedlevel_maxtilecoords: logging.info("Rounding error creates extra patches at the side(s) of the image.") grid_coord = (min(dzgmask_maxtilecoords[0], dzg_selectedlevel_maxtilecoords[0]), min(dzgmask_maxtilecoords[1], dzg_selectedlevel_maxtilecoords[1])) logging.info("Ignoring the image border. Maximum tile coordinates: " + str(grid_coord)) n_tiles = grid_coord[0] * grid_coord[1] else: grid_coord = dzg_selectedlevel_maxtilecoords # Counters preds = [0] * n_tiles row, col, i = 0, 0, 0 tile_names = [] tile_dims_w = [] tile_dims_h = [] tile_rows = [] tile_cols = [] # Evaluate tiles using the selector function while row < grid_coord[1]: # Extract the tile from the mask (the last level is used # since the mask is already rescaled) mask_tile = dzgmask.get_tile(dzgmask.level_count - 1, (col, row)) # Tile converted to BGR mask_tile = np.array(mask_tile) # Predict if the tile will be kept (1) or not (0) preds[i] = utility_functions.selector(mask_tile, self.input_slide.thres, bg_color, self.input_slide.method) # Save patches if requested if self.input_slide.save_patches: tile = dzg.get_tile(dzg_selectedlevel_idx, (col, row)) # If we need square patches only, we set the prediction to zero if the tile is not square if not self.input_slide.save_nonsquare: if tile.size[0] != tile.size[1]: preds[i] = 0 # Prepare metadata tile_names.append(self.input_slide.sample_id + "_" + str(i).zfill(digits_padding)) tile_dims_w.append(tile.size[0]) tile_dims_h.append(tile.size[1]) tile_rows.append(row) tile_cols.append(col) # Save tile imgtile_out = self.input_slide.tile_folder + tile_names[i] + "." + self.input_slide.format if self.input_slide.save_blank: tile.save(imgtile_out) else: if preds[i] == 1: tile.save(imgtile_out) # Draw cross over corresponding patch section on tilecrossed image if self.input_slide.save_tilecrossed_image: start_w = col * (tilecross_patchsize) start_h = row * (tilecross_patchsize) # If we reach the edge of the image, we only can draw until the edge pixel if (start_w + tilecross_patchsize) >= tilecrossed_img.size[0]: cl_w = tilecrossed_img.size[0] - start_w else: cl_w = tilecross_patchsize if (start_h + tilecross_patchsize) >= tilecrossed_img.size[1]: cl_h = tilecrossed_img.size[1] - start_h else: cl_h = tilecross_patchsize # Draw the cross only if the tile has to be kept if preds[i] == 1: # From top left to bottom right draw.line([(start_w, start_h), (start_w + cl_w, start_h + cl_h)], fill=(0, 0, 255), width=3) # From bottom left to top right draw.line([(start_w, start_h + cl_h), (start_w + cl_w, start_h)], fill=(0, 0, 255), width=3) # Jump to the next tilecross tile tc_w = tc_w + tilecross_patchsize + 1 # Jump to the next column tile col += 1 # If we reach the right edge of the image, jump to the next row if col == grid_coord[0]: col = 0 row += 1 if self.input_slide.save_tilecrossed_image: tc_w = 0 tc_h = tc_h + tilecross_patchsize + 1 # Increase counter for metadata i += 1 # Saving tilecrossed image if self.input_slide.save_tilecrossed_image: tilecrossed_outpath = self.input_slide.img_outpath + "/tilecrossed_" + self.input_slide.sample_id + "." + self.input_slide.format tilecrossed_img.save(tilecrossed_outpath) # Save predictions for each tile if self.input_slide.save_patches: patch_results = [] patch_results.extend(list(zip(tile_names, tile_dims_w, tile_dims_h, preds, tile_rows, tile_cols))) patch_results_df = pd.DataFrame.from_records(patch_results, columns=["Tile", "Width", "Height", "Keep", "Row", "Column"]) patch_results_df.to_csv(self.input_slide.img_outpath + "tile_selection.tsv", index=False, sep="\t") # Finishing te = time.time() logging.debug("Elapsed time: " + str(round(te - ts, ndigits = 3)) + "s") if self.input_slide.save_blank: logging.debug("Selected " + str(len(preds)) + " tiles") else: logging.debug("Selected " + str(sum(preds)) + " tiles")
im = openslide.OpenSlide(images_path + train_image_id[new_train_idx]) im2 = im.read_region(location=(0, 0), level=level, \ size=(im.level_dimensions[level][0],\ im.level_dimensions[level][1])) width = im.level_dimensions[level][0] height = im.level_dimensions[level][1] # 0:3 because the 4th element, alpha, is not being used data = np.array(im2)[:, :, 0:3] ax[0].imshow(data) # Split into patches dpz = deepzoom.DeepZoomGenerator(im, tile_size=tile_size, overlap=overlap, limit_bounds=False) width = dpz.level_tiles[dpz.level_count - 3][0] height = dpz.level_tiles[dpz.level_count - 3][1] offset = int(np.ceil(overlap / tile_size)) tiles1 = [] for j in range(offset, dpz.level_tiles[dpz.level_count - 3][1] - offset): for i in range( offset, dpz.level_tiles[dpz.level_count - 3][0] - offset): tiles1.append( np.asarray(dpz.get_tile(dpz.level_count - 3, (i, j)))) tiles1 = np.stack(tiles1, axis=0)
def predict_wsi(model, global_fixed, slide_path): # size_g, size_p = (244, 244), (244, 244) # size_g, size_p = (1008, 1008), (1008, 1008) # n_class = 2 # sub_batch_size = 1 def predict(image_as_tensor, size_g=(244, 244), size_p=(244, 244), n_class=2): images_glb = resize(image_as_tensor, size_g) scores = [ np.zeros((1, n_class, image_as_tensor[i].size[1], image_as_tensor[i].size[0])) for i in range(len(image_as_tensor)) ] images_glb = images_transform(images_glb) patches, coordinates, templates, sizes, ratios = global2patch( image_as_tensor, size_p) predicted_ensembles = [ np.zeros((len(coordinates[i]), n_class, size_p[0], size_p[1])) for i in range(len(image_as_tensor)) ] for i in range(len(image_as_tensor)): j = 0 while j < len(coordinates[i]): patches_var = images_transform(patches[i][j:j + 1]) # b, c, h, w fm_patches, _ = model.module.collect_local_fm( images_glb[i:i + 1], patches_var, ratios[i], coordinates[i], [j, j + 1], len(image_as_tensor), global_model=global_fixed, template=templates[i], n_patch_all=len(coordinates[i]), ) j += 1 _, fm_global = model.forward(images_glb, None, None, None, mode=PhaseMode.GlobalFromLocal) for i in range(len(image_as_tensor)): j = 0 # while j < n ** 2: while j < len(coordinates[i]): fl = fm_patches[i][j:j + 1].cuda() fg = crop_global(fm_global[i:i + 1], coordinates[i][j:j + 1], ratios[i])[0] fg = F.interpolate(fg, size=fl.size()[2:], mode="bilinear") output_ensembles = model.module.ensemble( fl, fg) # include cordinates # output_ensembles = F.interpolate(model.module.ensemble(fl, fg), size_p, **model.module._up_kwargs) # ensemble predictions predicted_ensembles[i][j:j + output_ensembles.size()[0]] += ( F.interpolate( output_ensembles, size=size_p, mode="nearest", ).data.cpu().numpy()) j += 1 scores[i] += np.rot90( np.array( patch2global( predicted_ensembles[i:i + 1], n_class, sizes[i:i + 1], coordinates[i:i + 1], size_p, )), k=0, axes=(3, 2), ) return [score.argmax(1)[0] for score in scores] slide = OpenSlide(slide_path) w, h = slide.level_dimensions[2] img = slide.read_region((0, 0), 2, (w, h)) slide.close() img.convert('RGB').save('/tmp/temp.jpg') slide = ImageSlide('/tmp/temp.jpg') dz = deepzoom.DeepZoomGenerator(slide, tile_size=1024, overlap=0) cols, rows = dz.level_tiles[-1] out = np.zeros((rows * 1024, cols * 1024), dtype=np.uint8) for row in range(rows): for col in range(cols): tile = dz.get_tile(dz.level_count - 1, (col, row)) # col, row tile_coors = dz.get_tile_coordinates(dz.level_count - 1, (col, row)) left, top = tile_coors[0] t_w, t_h = tile_coors[2] if tile.size != (1024, 1024): tile = add_extra_pixels(tile, expected_shape=(1024, 1024)) tile = np.array(tile) processed = apply_filters(tile) pred = predict([Image.fromarray(processed)]) pil_pred = pred[0].astype(np.uint8) newmask = remove_mask_overlay_background(processed, pil_pred) # applied_mask = apply_mask(tile, newmask) applied_mask = newmask out[top:top + t_h, left:left + t_w] = applied_mask[:t_h, :t_w] return out[:h, :w]
def makeSplitData(folderName, fileNameHere, dirName): print(fileNameHere) splitNum = 0 curFolder = dirName + folderName + '/' svsFile = fileNameHere + '.svs' slide = OpenSlide(curFolder + svsFile) magVal = slide.properties['aperio.AppMag'] mppVal = slide.properties['aperio.MPP'] zoomed = deepzoom.DeepZoomGenerator(slide, tile_size=360, overlap=0, limit_bounds=False) levelCount = int(zoomed.level_count) maxD = slide.level_dimensions[0] maxY = maxD[1] maxX = maxD[0] levelTiles = zoomed.level_tiles if magVal == '40': targetLevel = levelCount - 2 totTiles = levelTiles[targetLevel] maxY = int(maxD[1] / 2) maxX = int(maxD[0] / 2) elif magVal == '20' and mppVal[:3] == '0.2': targetLevel = levelCount - 2 totTiles = levelTiles[targetLevel] maxY = int(maxD[1] / 2) maxX = int(maxD[0] / 2) elif magVal == '20': targetLevel = levelCount - 1 totTiles = levelTiles[targetLevel] maxY = int(maxD[1]) maxX = int(maxD[0]) else: print('mag is not 20 or 40!!!') return train_batch = [] train_tile = [] train_ij = [] batch_num = 0 for i in range(totTiles[0] - 1): for j in range(totTiles[1] - 1): if i % 10 == 0 and j % 10 == 0: print('(' + str(i) + ',' + str(j) + ')') tile = zoomed.get_tile(targetLevel, (i, j)) train_tile.append(tile) tileStartX = i * 360 tileEndX = (i + 1) * 360 tileStartY = j * 360 tileEndY = (j + 1) * 360 batch_num += 1 train_batch.append(np.array(tile)) train_ij.append((i, j)) if batch_num == 256 or (i == (totTiles[0] - 2) and j == (totTiles[1] - 2)): result = sess.run((cp), feed_dict={ cx: train_batch, train_mode: False }) for k in range(len(result)): if result[k][1] > 0.5: train_tile[k].save(curFolder + '/TX/' + fileNameHere + '-' + str(train_ij[k][0]) + '-' + str(train_ij[k][1]) + '.jpg') splitNum += 1 else: train_tile[k].save(curFolder + '/NT/' + fileNameHere + '-' + str(train_ij[k][0]) + '-' + str(train_ij[k][1]) + '.jpg') splitNum += 1 batch_num = 0 train_batch = [] train_ij = [] train_tile = [] return splitNum
def setup(): slide = OpenSlide('HCI/slide-webserver/slideserver/static/testslide.svs') app.deepzoom = deepzoom.DeepZoomGenerator(slide)
def get_patches(slidepath, outpath, level=10, tissue_ratio=0.25, size=256): """ Function that divides a slide into patches with different resolution. The patches are saved inside a folder with the slide name, and have the format {slide_name}#{patch_number}-level{}-{x}-{y}.jpg. It also saves a preview for each slide under the format slidename.png Arguments: - slidepath: str, path to the image to patchify - outpath: str, path to the folder in which a new folder will be created, where the patches will be saved. This folder has the same name as the image to patchify - level: int, level in which image is patchified. The bigger the level, the higher the number of patches and the resolution of the images. Default = 16 - tissue_ratio: float, minimum surface of tissue tile to be considered. Default = 0.25 - size: int, side number of pixels (n pixels size*size). Default = 256 Returns: - n: int, number of patches - outpath: str, path to folder where the patches are saved """ # Opens the slide with OpenSlide slide = OpenSlide(slidepath) # Gets deepzoom tile division slide_dz = deepzoom.DeepZoomGenerator(slide, tile_size=(size - 2), overlap=1) # Gets the name and number of the slide slidename = os.path.basename(slidepath) # Saves a preview of the slide under 'slidename.png' slide_preview(slide, slidename, outpath) # Asures that the chosen level is valid if level < slide_dz.level_count: tiles = slide_dz.level_tiles[level] print('Level {} contains {} tiles (empty tiles included)'.format( level, slide_dz.level_tiles[level][0] * slide_dz.level_tiles[level][1])) else: print('Invalid level') return 0 # Creates new directory - where patches will be stored outpath = os.path.join(outpath, slidename) try: os.mkdir(outpath) print("Directory", outpath, "created") except FileExistsError: print("Directory", outpath, "already exists") print("Patches already extracted") return 0 # Saves tiles if detects tissue presence higher than tissue_ratio n = 0 print("Saving tiles image " + slidepath + "...") for i in tqdm(range(tiles[0])): for j in range(tiles[1]): # Gets the tile in position (i, j) tile = slide_dz.get_tile(level, (i, j)) image = numpy.array(tile)[..., :3] mask = tissue.get_tissue_from_rgb(image, blacktol=10, whitetol=240) # Saves tile in outpath only if tissue ratio is higher than threshold if mask.sum() > tissue_ratio * tile.size[0] * tile.size[1]: tile_path = os.path.join( outpath, '{}#{}-level{}-{}-{}.jpg'.format(slidename, n, level, i, j)) tile.save(tile_path) n = n + 1 print('Total of {} tiles with tissue ratio >{} in slide {}'.format( n, tissue_ratio, slidepath)) print() return n
def file_to_dask_array(path, tile_size=1000, overlap=0, remove_last=True, allow_unknown_chunksizes=False): """Load an image to a dask array. Parameters ---------- path : str The path to the image file as a string. tile_size : int, optional Size of chunk to be read in. overlap : int, optional Do not modify, overlap between neighboring tiles. remove_last : bool, optional Remove last tile because it has a custom size. allow_unknown_chunksizes : bool, optional Allow different chunk sizes, more flexible, but slowdown. Returns ------- arr : dask.array.Array A Dask Array representing the contents of the image file. Examples -------- >>> da_img = file_to_dask_array(path) >>> npa_img = arr.compute() # convert from dask array to numpy array >>> pil_img = to_pil(cv2.resize( ... npa_img, ... dsize=(1440, 700), ... interpolation=cv2.INTER_CUBIC ... )) >>> pil_img.save(test_image_name) """ if path.endswith('.npy'): import dask.array as da da.from_array(np.load(path)) else: import openslide img = openslide.open_slide(path) if isinstance(img, openslide.OpenSlide): from openslide import deepzoom import dask.array as da import dask.delayed gen = deepzoom.DeepZoomGenerator(img, tile_size=tile_size, overlap=overlap, limit_bounds=True) max_level = len(gen.level_dimensions) - 1 n_tiles_x, n_tiles_y = gen.level_tiles[max_level] @dask.delayed(pure=True) def get_tile(level, column, row): tile = gen.get_tile(level, (column, row)) # PIL.Image return da.transpose(da.from_array(np.array(tile)), axes=(1, 0, 2)) sample_tile_shape = get_tile(max_level, 0, 0).shape.compute() rows = range(n_tiles_y - (0 if not remove_last else 1)) cols = range(n_tiles_x - (0 if not remove_last else 1)) tiles = [ da.concatenate( [ da.from_delayed(get_tile(max_level, col, row), sample_tile_shape, np.uint8) for row in rows ], allow_unknown_chunksizes=allow_unknown_chunksizes, axis=1) for col in cols ] arr = da.concatenate( tiles, allow_unknown_chunksizes=allow_unknown_chunksizes).transpose( [1, 0, 2]) return arr else: # img is instance of openslide.ImageSlide import dask_image.imread return dask_image.imread.imread(path)