def filter_binary_closing(np_img, disk_size=3, iterations=1, output_type="uint8"): """ Close a binary object (bool, float, or uint8). Closing is a dilation followed by an erosion. Closing can be used to remove small holes. Args: np_img: Binary image as a NumPy array. disk_size: Radius of the disk structuring element used for closing. iterations: How many times to repeat. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array (bool, float, or uint8) following binary closing. """ t = Time() if np_img.dtype == "uint8": np_img = np_img / 255 result = sc_morph.binary_closing(np_img, sk_morphology.disk(disk_size), iterations=iterations) if output_type == "bool": pass elif output_type == "float": result = result.astype(float) else: result = result.astype("uint8") * 255 util.np_info(result, "Binary Closing", t.elapsed()) return result
def singleprocess_filtered_images_to_tiles(display=False, save_summary=True, save_data=True, save_top_tiles=True, html=True, image_list=None): """ Generate tile summaries and tiles for training images using a single process. Args: display: If True, display tile summary images to screen. save_summary: If True, save tile summary images. save_data: If True, save tile data to csv file. save_top_tiles: If True, save top tiles to files. html: If True, generate HTML page to display tiled images image_list: Optionally specify a list of image slide names. """ t = Time() print("Generating tile summaries\n") if image_list is not None: image_list, tile_summaries_dict = image_list_to_tiles( image_list, display, save_summary, save_data, save_top_tiles) else: num_training_slides = slide.get_num_training_slides() image_list, tile_summaries_dict = image_range_to_tiles( 1, num_training_slides, display, save_summary, save_data, save_top_tiles) print("Time to generate tile summaries: %s\n" % str(t.elapsed())) if html: generate_tiled_html_result(image_list, tile_summaries_dict, save_data)
def filter_grays(rgb, tolerance=15, output_type="bool"): """ Create a mask to filter out pixels where the red, green, and blue channel values are similar. Args: np_img: RGB image as a NumPy array. tolerance: Tolerance value to determine how similar the values must be in order to be filtered out output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array representing a mask where pixels with similar red, green, and blue values have been masked out. """ t = Time() (h, w, c) = rgb.shape rgb = rgb.astype(np.int) rg_diff = abs(rgb[:, :, 0] - rgb[:, :, 1]) <= tolerance rb_diff = abs(rgb[:, :, 0] - rgb[:, :, 2]) <= tolerance gb_diff = abs(rgb[:, :, 1] - rgb[:, :, 2]) <= tolerance result = ~(rg_diff & rb_diff & gb_diff) if output_type == "bool": pass elif output_type == "float": result = result.astype(float) else: result = result.astype("uint8") * 255 util.np_info(result, "Filter Grays", t.elapsed()) return result
def filter_binary_dilation(np_img, disk_size=5, iterations=1, output_type="uint8"): """ Dilate a binary object (bool, float, or uint8). Args: np_img: Binary image as a NumPy array. disk_size: Radius of the disk structuring element used for dilation. iterations: How many times to repeat the dilation. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array (bool, float, or uint8) where edges have been dilated. """ t = Time() if np_img.dtype == "uint8": np_img = np_img / 255 result = sc_morph.binary_dilation(np_img, sk_morphology.disk(disk_size), iterations=iterations) if output_type == "bool": pass elif output_type == "float": result = result.astype(float) else: result = result.astype("uint8") * 255 util.np_info(result, "Binary Dilation", t.elapsed()) return result
def filter_green_pen(rgb, output_type="bool"): """ Create a mask to filter out green pen marks from a slide. Args: rgb: RGB image as a NumPy array. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array representing the mask. """ t = Time() result = filter_green(rgb, red_upper_thresh=150, green_lower_thresh=160, blue_lower_thresh=140) & \ filter_green(rgb, red_upper_thresh=70, green_lower_thresh=110, blue_lower_thresh=110) & \ filter_green(rgb, red_upper_thresh=45, green_lower_thresh=115, blue_lower_thresh=100) & \ filter_green(rgb, red_upper_thresh=30, green_lower_thresh=75, blue_lower_thresh=60) & \ filter_green(rgb, red_upper_thresh=195, green_lower_thresh=220, blue_lower_thresh=210) & \ filter_green(rgb, red_upper_thresh=225, green_lower_thresh=230, blue_lower_thresh=225) & \ filter_green(rgb, red_upper_thresh=170, green_lower_thresh=210, blue_lower_thresh=200) & \ filter_green(rgb, red_upper_thresh=20, green_lower_thresh=30, blue_lower_thresh=20) & \ filter_green(rgb, red_upper_thresh=50, green_lower_thresh=60, blue_lower_thresh=40) & \ filter_green(rgb, red_upper_thresh=30, green_lower_thresh=50, blue_lower_thresh=35) & \ filter_green(rgb, red_upper_thresh=65, green_lower_thresh=70, blue_lower_thresh=60) & \ filter_green(rgb, red_upper_thresh=100, green_lower_thresh=110, blue_lower_thresh=105) & \ filter_green(rgb, red_upper_thresh=165, green_lower_thresh=180, blue_lower_thresh=180) & \ filter_green(rgb, red_upper_thresh=140, green_lower_thresh=140, blue_lower_thresh=150) & \ filter_green(rgb, red_upper_thresh=185, green_lower_thresh=195, blue_lower_thresh=195) if output_type == "bool": pass elif output_type == "float": result = result.astype(float) else: result = result.astype("uint8") * 255 util.np_info(result, "Filter Green Pen", t.elapsed()) return result
def filter_adaptive_equalization(np_img, nbins=256, clip_limit=0.01, output_type="uint8"): """ Filter image (gray or RGB) using adaptive equalization to increase contrast in image, where contrast in local regions is enhanced. Args: np_img: Image as a NumPy array (gray or RGB). nbins: Number of histogram bins. clip_limit: Clipping limit where higher value increases contrast. output_type: Type of array to return (float or uint8). Returns: NumPy array (float or uint8) with contrast enhanced by adaptive equalization. """ t = Time() adapt_equ = sk_exposure.equalize_adapthist(np_img, nbins=nbins, clip_limit=clip_limit) if output_type == "float": pass else: adapt_equ = (adapt_equ * 255).astype("uint8") util.np_info(adapt_equ, "Adapt Equalization", t.elapsed()) return adapt_equ
def filter_remove_small_holes(np_img, min_size=3000, output_type="uint8"): """ Filter image to remove small holes less than a particular size. Args: np_img: Image as a NumPy array of type bool. min_size: Remove small holes below this size. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array (bool, float, or uint8). """ t = Time() rem_sm = sk_morphology.remove_small_holes(np_img, min_size=min_size) if output_type == "bool": pass elif output_type == "float": rem_sm = rem_sm.astype(float) else: rem_sm = rem_sm.astype("uint8") * 255 util.np_info(rem_sm, "Remove Small Holes", t.elapsed()) return rem_sm
def filter_canny(np_img, sigma=1, low_threshold=0, high_threshold=25, output_type="uint8"): """ Filter image based on Canny algorithm edges. Args: np_img: Image as a NumPy array. sigma: Width (std dev) of Gaussian. low_threshold: Low hysteresis threshold value. high_threshold: High hysteresis threshold value. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array (bool, float, or uint8) representing Canny edge map (binary image). """ t = Time() can = sk_feature.canny(np_img, sigma=sigma, low_threshold=low_threshold, high_threshold=high_threshold) if output_type == "bool": pass elif output_type == "float": can = can.astype(float) else: can = can.astype("uint8") * 255 util.np_info(can, "Canny Edges", t.elapsed()) return can
def save_tile_data(tile_summary): """ Save tile data to csv file. Args tile_summary: TimeSummary object. """ time = Time() csv = summary_title(tile_summary) + "\n" + summary_stats(tile_summary) csv += "\n\n\nTile Num,Row,Column,Tissue %,Tissue Quantity,Col Start,Row Start,Col End,Row End,Col Size,Row Size," + \ "Color Factor,S and V Factor,Quantity Factor,Score\n" for t in tile_summary.tiles: line = "%d,%d,%d,%4.2f,%s,%d,%d,%d,%d,%d,%d,%4.0f,%4.2f,%4.2f,%0.4f\n" % ( t.tile_num, t.r, t.c, t.tissue_percentage, t.tissue_quantity().name, t.c_s, t.r_s, t.c_e, t.r_e, t.c_e - t.c_s, t.r_e - t.r_s, t.color_factor, t.s_and_v_factor, t.quantity_factor, t.score) csv += line data_path = slide.get_tile_data_path(tile_summary.slide_name) csv_file = open(data_path, "w") csv_file.write(csv) csv_file.close() print("%-20s | Time: %-14s Name: %s" % ("Save Tile Data", str(time.elapsed()), data_path))
def singleprocess_training_slides_to_images(): """ Convert all WSI training slides to smaller images using a single process. """ t = Time() num_train_images = get_num_training_slides() training_slide_range_to_images(1, num_train_images) t.elapsed_display()
def save_tile_summary_image(pil_img, slide_name): """ Save a tile summary image and thumbnail to the file system. Args: pil_img: Image as a PIL Image. slide_name: The slide name. """ t = Time() filepath = slide.get_tile_summary_image_path(slide_name) pil_img.save(filepath) print("%-20s | Time: %-14s Name: %s" % ("Save Tile Sum", str(t.elapsed()), filepath))
def save_top_tiles_on_original_image(pil_img, slide_name): """ Save a top tiles on original image and thumbnail to the file system. Args: pil_img: Image as a PIL Image. slide_name: The slide name. """ t = Time() filepath = slide.get_top_tiles_on_original_image_path(slide_name) pil_img.save(filepath) print("%-20s | Time: %-14s Name: %s" % ("Save Top Orig", str(t.elapsed()), filepath))
def multiprocess_training_slides_to_images(): """ Convert all WSI training slides to smaller images using multiple processes (one process per core). Each process will process a range of slide numbers. """ timer = Time() # how many processes to use num_processes = multiprocessing.cpu_count() pool = multiprocessing.Pool(num_processes) num_train_images = get_num_training_slides() if num_processes > num_train_images: num_processes = num_train_images images_per_process = num_train_images / num_processes print("Number of processes: " + str(num_processes)) print("Number of training images: " + str(num_train_images)) # each task specifies a range of slides tasks = [] for num_process in range(1, num_processes + 1): start_index = (num_process - 1) * images_per_process + 1 end_index = num_process * images_per_process start_index = int(start_index) end_index = int(end_index) tasks.append((start_index, end_index)) if start_index == end_index: print("Task #" + str(num_process) + ": Process slide " + str(start_index)) else: print("Task #" + str(num_process) + ": Process slides " + str(start_index) + " to " + str(end_index)) # start tasks results = [] for t in tasks: results.append(pool.apply_async(training_slide_range_to_images, t)) for result in results: (start_ind, end_ind) = result.get() if start_ind == end_ind: print("Done converting slide %d" % start_ind) else: print("Done converting slides %d through %d" % (start_ind, end_ind)) timer.elapsed_display()
def filter_local_equalization(np_img, disk_size=50): """ Filter image (gray) using local equalization, which uses local histograms based on the disk structuring element. Args: np_img: Image as a NumPy array. disk_size: Radius of the disk structuring element used for the local histograms Returns: NumPy array with contrast enhanced using local equalization. """ t = Time() local_equ = sk_filters.rank.equalize(np_img, selem=sk_morphology.disk(disk_size)) util.np_info(local_equ, "Local Equalization", t.elapsed()) return local_equ
def save_filtered_image(np_img, slide_name, filter_num, filter_text): """ Save a filtered image to the file system. Args: np_img: Image as a NumPy array. slide_name: The slide number. filter_num: The filter number. filter_text: Descriptive text to add to the image filename. """ t = Time() filepath = slide.get_filter_image_path(slide_name, filter_num, filter_text) pil_img = util.np_to_pil(np_img) pil_img.save(filepath) print("%-20s | Time: %-14s Name: %s" % ("Save Image", str(t.elapsed()), filepath))
def apply_filters_to_image(slide_name, save=True, display=False): """ Apply a set of filters to an image and optionally save and/or display filtered images. Args: slide_name: The slide name. save: If True, save filtered images. display: If True, display filtered images to screen. Returns: Tuple consisting of 1) the resulting filtered image as a NumPy array, and 2) dictionary of image information (used for HTML page generation). """ t = Time() print(f"Processing slide {slide_name}") info = dict() if save and not os.path.exists(slide.FILTER_DIR): os.makedirs(slide.FILTER_DIR) np_orig = slide.get_slide(slide_name) filtered_np_img = apply_image_filters(np_orig, slide_name, info, save=False, display=False) if save: result_path = slide.get_filter_image_result(slide_name) pil_img = util.np_to_pil(filtered_np_img) pil_img.save(result_path) return filtered_np_img, info
def slide_info(display_all_properties=False): """ Display information (such as properties) about training images. Args: display_all_properties: If True, display all available slide properties. """ t = Time() num_train_images = get_num_training_slides() obj_pow_20_list = [] obj_pow_40_list = [] obj_pow_other_list = [] for slide_num in range(1, num_train_images + 1): slide_filepath = get_training_slide_path(slide_num) print("\nOpening Slide #%d: %s" % (slide_num, slide_filepath)) slide = open_slide(slide_filepath) print("Level count: %d" % slide.level_count) print("Level dimensions: " + str(slide.level_dimensions)) print("Level downsamples: " + str(slide.level_downsamples)) print("Dimensions: " + str(slide.dimensions)) objective_power = int( slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) print("Objective power: " + str(objective_power)) if objective_power == 20: obj_pow_20_list.append(slide_num) elif objective_power == 40: obj_pow_40_list.append(slide_num) else: obj_pow_other_list.append(slide_num) print("Associated images:") for ai_key in slide.associated_images.keys(): print(" " + str(ai_key) + ": " + str(slide.associated_images.get(ai_key))) print("Format: " + str(slide.detect_format(slide_filepath))) if display_all_properties: print("Properties:") for prop_key in slide.properties.keys(): print(" Property: " + str(prop_key) + ", value: " + str(slide.properties.get(prop_key))) print("\n\nSlide Magnifications:") print(" 20x Slides: " + str(obj_pow_20_list)) print(" 40x Slides: " + str(obj_pow_40_list)) print(" ??x Slides: " + str(obj_pow_other_list) + "\n") t.elapsed_display()
def filter_rgb_to_hsv(np_img, display_np_info=True): """ Filter RGB channels to HSV (Hue, Saturation, Value). Args: np_img: RGB image as a NumPy array. display_np_info: If True, display NumPy array info and filter time. Returns: Image as NumPy array in HSV representation. """ if display_np_info: t = Time() hsv = sk_color.rgb2hsv(np_img) if display_np_info: util.np_info(hsv, "RGB to HSV", t.elapsed()) return hsv
def filter_complement(np_img, output_type="uint8"): """ Obtain the complement of an image as a NumPy array. Args: np_img: Image as a NumPy array. type: Type of array to return (float or uint8). Returns: Complement image as Numpy array. """ t = Time() if output_type == "float": complement = 1.0 - np_img else: complement = 255 - np_img util.np_info(complement, "Complement", t.elapsed()) return complement
def filter_remove_small_objects(np_img, min_size=3000, avoid_overmask=True, overmask_thresh=95, output_type="uint8"): """ Filter image to remove small objects (connected components) less than a particular minimum size. If avoid_overmask is True, this function can recursively call itself with progressively smaller minimum size objects to remove to reduce the amount of masking that this filter performs. Args: np_img: Image as a NumPy array of type bool. min_size: Minimum size of small object to remove. avoid_overmask: If True, avoid masking above the overmask_thresh percentage. overmask_thresh: If avoid_overmask is True, avoid masking above this threshold percentage value. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array (bool, float, or uint8). """ t = Time() rem_sm = np_img.astype(bool) # make sure mask is boolean rem_sm = sk_morphology.remove_small_objects(rem_sm, min_size=min_size) mask_percentage = mask_percent(rem_sm) if (mask_percentage >= overmask_thresh) and (min_size >= 1) and (avoid_overmask is True): new_min_size = min_size / 2 #print("Mask percentage %3.2f%% >= overmask threshold %3.2f%% for Remove Small Objs size %d, so try %d" % ( #mask_percentage, overmask_thresh, min_size, new_min_size)) rem_sm = filter_remove_small_objects(np_img, new_min_size, avoid_overmask, overmask_thresh, output_type) np_img = rem_sm if output_type == "bool": pass elif output_type == "float": np_img = np_img.astype(float) else: np_img = np_img.astype("uint8") * 255 util.np_info(np_img, "Remove Small Objs", t.elapsed()) return np_img
def filter_green_channel(np_img, green_thresh=200, avoid_overmask=True, overmask_thresh=90, output_type="bool"): """ Create a mask to filter out pixels with a green channel value greater than a particular threshold, since hematoxylin and eosin are purplish and pinkish, which do not have much green to them. Args: np_img: RGB image as a NumPy array. green_thresh: Green channel threshold value (0 to 255). If value is greater than green_thresh, mask out pixel. avoid_overmask: If True, avoid masking above the overmask_thresh percentage. overmask_thresh: If avoid_overmask is True, avoid masking above this threshold percentage value. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array representing a mask where pixels above a particular green channel threshold have been masked out. """ t = Time() g = np_img[:, :, 1] gr_ch_mask = (g < green_thresh) & (g > 0) mask_percentage = mask_percent(gr_ch_mask) if (mask_percentage >= overmask_thresh) and (green_thresh < 255) and ( avoid_overmask is True): new_green_thresh = math.ceil((255 - green_thresh) / 2 + green_thresh) #print( #"Mask percentage %3.2f%% >= overmask threshold %3.2f%% for Remove Green Channel green_thresh=%d, so try %d" % ( #mask_percentage, overmask_thresh, green_thresh, new_green_thresh)) gr_ch_mask = filter_green_channel(np_img, new_green_thresh, avoid_overmask, overmask_thresh, output_type) np_img = gr_ch_mask if output_type == "bool": pass elif output_type == "float": np_img = np_img.astype(float) else: np_img = np_img.astype("uint8") * 255 util.np_info(np_img, "Filter Green Channel", t.elapsed()) return np_img
def filter_contrast_stretch(np_img, low=40, high=60): """ Filter image (gray or RGB) using contrast stretching to increase contrast in image based on the intensities in a specified range. Args: np_img: Image as a NumPy array (gray or RGB). low: Range low value (0 to 255). high: Range high value (0 to 255). Returns: Image as NumPy array with contrast enhanced. """ t = Time() low_p, high_p = np.percentile(np_img, (low * 100 / 255, high * 100 / 255)) contrast_stretch = sk_exposure.rescale_intensity(np_img, in_range=(low_p, high_p)) util.np_info(contrast_stretch, "Contrast Stretch", t.elapsed()) return contrast_stretch
def singleprocess_apply_filters_to_images(save=True, display=False, html=False, image_name_list=None): """ Apply a set of filters to training images and optionally save and/or display the filtered images. Args: save: If True, save filtered images. display: If True, display filtered images to screen. html: If True, generate HTML page to display filtered images. image_name_list: Optionally specify a list of image slide names. """ t = Time() print("Applying filters to images\n") if image_name_list is not None: _, info = apply_filters_to_image_list(image_name_list, save, display) print("Time to apply filters to all images: %s\n" % str(t.elapsed()))
def filter_rgb_to_grayscale(np_img, output_type="uint8"): """ Convert an RGB NumPy array to a grayscale NumPy array. Shape (h, w, c) to (h, w). Args: np_img: RGB Image as a NumPy array. output_type: Type of array to return (float or uint8) Returns: Grayscale image as NumPy array with shape (h, w). """ t = Time() # Another common RGB ratio possibility: [0.299, 0.587, 0.114] grayscale = np.dot(np_img[..., :3], [0.2125, 0.7154, 0.0721]) if output_type != "float": grayscale = grayscale.astype("uint8") util.np_info(grayscale, "Gray", t.elapsed()) return grayscale
def filter_hed_to_eosin(np_img, output_type="uint8"): """ Obtain Eosin channel from HED NumPy array and rescale it (for example, to 0 to 255 for uint8) for increased contrast. Args: np_img: HED image as a NumPy array. output_type: Type of array to return (float or uint8). Returns: NumPy array for Eosin channel. """ t = Time() eosin = np_img[:, :, 1] if output_type == "float": eosin = sk_exposure.rescale_intensity(eosin, out_range=(0.0, 1.0)) else: eosin = (sk_exposure.rescale_intensity( eosin, out_range=(0, 255))).astype("uint8") util.np_info(eosin, "HED to Eosin", t.elapsed()) return eosin
def filter_kmeans_segmentation(np_img, compactness=10, n_segments=800): """ Use K-means segmentation (color/space proximity) to segment RGB image where each segment is colored based on the average color for that segment. Args: np_img: Binary image as a NumPy array. compactness: Color proximity versus space proximity factor. n_segments: The number of segments. Returns: NumPy array (uint8) representing 3-channel RGB image where each segment has been colored based on the average color for that segment. """ t = Time() labels = sk_segmentation.slic(np_img, compactness=compactness, n_segments=n_segments) result = sk_color.label2rgb(labels, np_img, kind='avg') util.np_info(result, "K-Means Segmentation", t.elapsed()) return result
def filter_otsu_threshold(np_img, output_type="uint8"): """ Compute Otsu threshold on image as a NumPy array and return binary image based on pixels above threshold. Args: np_img: Image as a NumPy array. output_type: Type of array to return (bool, float, or uint8). Returns: NumPy array (bool, float, or uint8) where True, 1.0, and 255 represent a pixel above Otsu threshold. """ t = Time() otsu_thresh_value = sk_filters.threshold_otsu(np_img) otsu = (np_img > otsu_thresh_value) if output_type == "bool": pass elif output_type == "float": otsu = otsu.astype(float) else: otsu = otsu.astype("uint8") * 255 util.np_info(otsu, "Otsu Threshold", t.elapsed()) return otsu
def filter_rgb_to_hed(np_img, output_type="uint8"): """ Filter RGB channels to HED (Hematoxylin - Eosin - Diaminobenzidine) channels. Args: np_img: RGB image as a NumPy array. output_type: Type of array to return (float or uint8). Returns: NumPy array (float or uint8) with HED channels. """ t = Time() hed = sk_color.rgb2hed(np_img) if output_type == "float": hed = sk_exposure.rescale_intensity(hed, out_range=(0.0, 1.0)) else: hed = (sk_exposure.rescale_intensity(hed, out_range=(0, 255))).astype("uint8") util.np_info(hed, "RGB to HED", t.elapsed()) return hed
def filter_green(rgb, red_upper_thresh, green_lower_thresh, blue_lower_thresh, output_type="bool", display_np_info=False): """ Create a mask to filter out greenish colors, where the mask is based on a pixel being below a red channel threshold value, above a green channel threshold value, and above a blue channel threshold value. Note that for the green ink, the green and blue channels tend to track together, so we use a blue channel lower threshold value rather than a blue channel upper threshold value. Args: rgb: RGB image as a NumPy array. red_upper_thresh: Red channel upper threshold value. green_lower_thresh: Green channel lower threshold value. blue_lower_thresh: Blue channel lower threshold value. output_type: Type of array to return (bool, float, or uint8). display_np_info: If True, display NumPy array info and filter time. Returns: NumPy array representing the mask. """ if display_np_info: t = Time() r = rgb[:, :, 0] < red_upper_thresh g = rgb[:, :, 1] > green_lower_thresh b = rgb[:, :, 2] > blue_lower_thresh result = ~(r & g & b) if output_type == "bool": pass elif output_type == "float": result = result.astype(float) else: result = result.astype("uint8") * 255 if display_np_info: util.np_info(result, "Filter Green", t.elapsed()) return result
def save_display_tile(tile, save=True, display=False): """ Save and/or display a tile image. Args: tile: Tile object. save: If True, save tile image. display: If True, dispaly tile image. """ tile_pil_img = tile_to_pil_tile(tile) if save: t = Time() img_path = slide.get_tile_image_path(tile) dir = os.path.dirname(img_path) if not os.path.exists(dir): os.makedirs(dir) tile_pil_img.save(img_path) print("%-20s | Time: %-14s Name: %s" % ("Save Tile", str(t.elapsed()), img_path)) if display: tile_pil_img.show()