def preprocess_image3d(args, img): # Background removal if args.t is not None: verbose_print(args, f"Performing background removal with threshold {args.t}") img = remove_background(img, args.t) # Histogram equalization if args.k is not None: if args.k == 0: verbose_print(args, f"Performing histogram equalization with default kernel size") kernel_size = None else: verbose_print(args, f"Performing histogram equalization with kernel size {args.k}") kernel_size = args.k img = clahe(img, kernel_size=kernel_size) # Normalize and convert to float if args.float: img = rescale_intensity(img_as_float32(img)) verbose_print(args, f"Converted to normalized float32: min {img.min():.3f}, max {img.max():.3f}") # Denoising if args.s is not None: verbose_print(args, f"Performing noise removal with sigma {args.s} and wavelet {args.w}") img = denoise(img, args.s, args.w) # Convert to Zarr verbose_print(args, f"Saving result to {args.zarr}") arr = io.new_zarr(args.zarr, shape=img.shape, dtype=img.dtype, chunks=tuple(args.c)) arr[:] = img return img
def segment_main(args): if args.n is None: nb_workers = multiprocessing.cpu_count() else: nb_workers = args.n # Open probability map Zarr array verbose_print(args, f'Segmenting nuclei in {args.input}') prob_arr = io.open(args.input, mode='r') shape, dtype, chunks = prob_arr.shape, prob_arr.dtype, prob_arr.chunks verbose_print(args, f'Opened image: {shape} {dtype}') if dtype != 'float32': warnings.warn( 'Input dtype is not float32... may not have passed a probability map' ) # Load nuclei centroids centroids = np.load(args.centroids) # Create foreground mask by thresholding the probability map verbose_print( args, f'Thresholding probability at {args.t}, writing foreground to {args.foreground}' ) foreground_arr = io.new_zarr(args.foreground, shape=shape, chunks=chunks, dtype='uint8') f = partial(_threshold_chunk, threshold=args.t, output=foreground_arr) utils.pmap_chunks(f, prob_arr, chunks, 1, use_imap=True) # Add watershed lines to the foreground mask to break up touching nuclei verbose_print( args, f'Performing watershed, writing binary segmentation to {args.output}') binary_seg = io.new_zarr(args.output, shape, chunks, 'uint8') watershed_centers_parallel(prob_arr, centers=centroids, mask=foreground_arr, output=binary_seg, chunks=chunks, overlap=args.o, nb_workers=nb_workers) verbose_print(args, 'Nuclei segmentation done!')
def detect_main(args): if args.voxel_size is not None and args.output_um is None: raise ValueError( 'A path to output_um array must be specified if given voxel dimensions' ) elif args.voxel_size is None and args.output_um is not None: raise ValueError( 'Voxel size must be specified if path to output_um is given') if args.n < 0: nb_workers = multiprocessing.cpu_count() else: nb_workers = int(args.n) # Open nuclei Zarr array verbose_print(args, f'Detecting nuclei in {args.input}') arr = io.open(args.input, mode='r') shape, dtype, chunks = arr.shape, arr.dtype, arr.chunks verbose_print(args, f'Opened image: {shape} {dtype}') # Create probability Zarr array prob_arr = io.new_zarr(args.probability, shape=shape, chunks=chunks, dtype='float32') # Detect nuclei centroids = detection.detect_nuclei_parallel( arr, sigma=args.g, min_intensity=args.m, steepness=args.s, offset=args.b, I0=args.r, stdev=args.x, prob_thresh=args.p, min_dist=args.d, chunks=tuple(args.c), overlap=args.o, nb_workers=nb_workers, # GPU requires one worker prob_output=prob_arr) nb_centroids = centroids.shape[0] verbose_print(args, f'Found {nb_centroids} nuclei centroids') # Convert to micron if possible if args.voxel_size is not None: voxel_size = utils.read_voxel_size(args.voxel_size) centroids_um = centroids * np.asarray(voxel_size) # Save centroids np.save(args.output, centroids) verbose_print(args, f'Saved centroids to {args.output}') if args.output_um is not None: np.save(args.output_um, centroids_um) verbose_print(args, f'Saved centroids in micron to {args.output_um}') verbose_print(args, f'Nuclei detection done!')
def old_preprocessing_main(args): if args.t is None and args.s is None and args.k is None: raise ValueError('No preprocessing tasks were specified') verbose_print(args, f"Preprocessing {args.input}") if os.path.isdir(args.input): # Load series of 2D TIFFs and process in parallel paths, filenames = tifs_in_dir(args.input) img = io.imread(paths[0]) shape = (len(paths), *img.shape) if args.float: dtype = 'float32' else: dtype = img.dtype arr = io.new_zarr(args.zarr, shape=shape, dtype=dtype, chunks=tuple(args.c)) args_list = [] for i, (path, _) in enumerate(zip(paths, filenames)): args_list.append((args, path, arr, i)) with multiprocessing.Pool(multiprocessing.cpu_count()) as pool: list(tqdm.tqdm(pool.imap_unordered(_preprocess_image2d, args_list), total=len(args_list))) if args.p is not None: before = io.imread(paths[args.p]) after = arr[args.p] elif os.path.isdir(args.input): # Load 3D TIFF and process in memory img = io.imread(args.input) # Keep reference to before image if plotting if args.p is not None: before = np.copy(img[args.p]) verbose_print(args, f"Loaded image: {img.shape} {img.dtype}") img = preprocess_image3d(args, img) if args.p is not None: after = np.copy(img[args.p]) else: raise ValueError('Input is not a valid directory or file') # Show A/B plot if args.p is not None: plt.subplot(121) plt.imshow(before) plt.title('Before') plt.subplot(122) plt.imshow(after) plt.title('After') plt.show() verbose_print(args, f"Preprocessing done!")
def convert_main(args): nb_workers = _check_workers(args) verbose_print(args, f"Converting {args.input} to Zarr") # Find all TIFFs paths, filenames = tifs_in_dir(args.input) verbose_print(args, f"Found {len(paths)} TIFFs") paths_chunked = [paths[pos:pos + args.c[0]] for pos in range(0, len(paths), args.c[0])] img = io.imread(paths[0]) shape = (len(paths), *img.shape) dtype = img.dtype chunks = tuple(args.c) arr = io.new_zarr(args.output, shape=shape, dtype=dtype, chunks=chunks) verbose_print(args, f"Writiing to {args.output}") args_list = [] for i, paths_batch in enumerate(paths_chunked): args_list.append((paths_batch, i, chunks[0], arr)) with multiprocessing.Pool(nb_workers) as pool: list(tqdm.tqdm(pool.imap(_convert_batch, args_list), total=len(args_list))) verbose_print(args, f"Conversion done!")
def fluorescence_main(args): if isinstance(args.inputs, list): inputs = args.inputs else: inputs = [args.inputs] nb_images = len(inputs) verbose_print(args, f'Passed {nb_images} images to measure fluorescence') # Load centroids centroids = np.load(args.centroids) # Initialize output arrays mfis = np.zeros((centroids.shape[0], nb_images)) stdevs = np.zeros((centroids.shape[0], nb_images)) for i, path in enumerate(inputs): # Open image arr = io.open(path, mode='r') shape, dtype, chunks = arr.shape, arr.dtype, arr.chunks verbose_print(args, f'Sampling from {path}: {shape} {dtype}') # Sample image if args.g is not None: # Perform smoothing in a temporary array verbose_print(args, f'Smoothing {path} with sigma {tuple(args.g)}') with tempfile.TemporaryDirectory( prefix=os.path.abspath('.')) as temp_path: smoothed_arr = io.new_zarr(temp_path, shape, chunks, dtype) gaussian_blur_parallel( arr, args.g, smoothed_arr, arr.chunks, args.o, args.w) # Too many workers gives Zarr race condition verbose_print(args, f'Sampling fluorescence from smoothed {path}') intensities = nuclei_centered_intensities(smoothed_arr, centroids, args.r, mode=args.m, nb_workers=args.w) # Temporary array deleted when context ends else: intensities = nuclei_centered_intensities(arr, centroids, args.r, mode=args.m, nb_workers=args.w) # Compute statistics mfis[:, i] = calculate_mfi(intensities) stdevs[:, i] = calculate_stdev(intensities) # Make output folder os.makedirs(args.output, exist_ok=True) # Save numpy array of MFIs and stdevs mfi_path = os.path.join(args.output, 'nuclei_mfis.npy') np.save(mfi_path, mfis) verbose_print(args, f'MFIs written to {mfi_path}') stdev_path = os.path.join(args.output, 'nuclei_stdevs.npy') np.save(stdev_path, stdevs) verbose_print(args, f'StDevs written to {stdev_path}') # Save CSV containing morphologies for each detected centroid # sox2.zarr/ <-- forward slash makes os.path.basename eval to empty string # Can use os.path.dirname(path) to get sox2.zarr, then use basename on that basenames = [ os.path.basename(os.path.dirname(path)).split('.')[0] for path in inputs ] csv_names = ['fluorescence_' + str(base) + '.csv' for base in basenames] csv_paths = [os.path.join(args.output, name) for name in csv_names] for i, (base, path) in enumerate(zip(basenames, csv_paths)): df = pd.DataFrame({'mfi': mfis[:, i], 'stdev': stdevs[:, i]}) df.to_csv(path) verbose_print(args, f'Fluorescence statistics for {base} written to {path}') verbose_print(args, f'Fluorescence measurements done!')