def sample_main(args): if isinstance(args.inputs, list): inputs = args.inputs else: inputs = [args.inputs] if isinstance(args.outputs, list): outputs = args.outputs else: outputs = [args.outputs] if len(inputs) != len(outputs): raise ValueError("Number of inputs and outputs must match") verbose_print(args, f'Taking {args.samples} random samples from {args.inputs}') np.random.seed(args.seed) verbose_print(args, f'Random seed set to {args.seed}') # Load arrays input_arrs = [np.load(path) for path in args.inputs] # Randomly sample sampled_data, idx = randomly_sample(args.samples, *input_arrs, return_idx=True) # Save sample for output, samples in zip(outputs, sampled_data): np.save(output, samples) verbose_print(args, f'Saved samples to {output}') np.save(args.index, idx) verbose_print(args, f'Saved sample index to {args.index}') verbose_print(args, f'Randomly sampling done!')
def _check_input(args): if os.path.isdir(args.input): verbose_print(args, f"Preprocessing 2D TIFFs in {args.input}") is_folder = True elif os.path.isfile(args.input): verbose_print(args, f"Preprocessing 3D TIFF {args.input}") is_folder = False else: raise ValueError('Input is not a valid directory or file') return is_folder
def tsne_main(args): verbose_print(args, f'Loaded niche labels from {args.labels}') labels = np.load(args.labels) verbose_print(args, f'Running t-SNE based on {args.proximity}') proximities = np.load(args.proximity) x_tsne = TSNE(n_components=2, n_jobs=-1, perplexity=800, learning_rate=100).fit_transform(proximities) if args.plot: # Show tSNE for i in range(4): idx = np.where(labels == i)[0] if len(idx) == 0: continue plt.plot(x_tsne[idx, 0], x_tsne[idx, 1], '.', label=f'Cluster {i}') plt.legend() plt.show() # Save the t-SNE coordinates np.save(args.tsne, x_tsne) verbose_print(args, f't-SNE coordinates saved to {args.tsne}') verbose_print(args, f'Niche clustering done!')
def radial_main(args): verbose_print(args, f'Calculating radial profiles for {args.centroids}') # Load centroids and cell-type labels centroids = np.load(args.centroids) celltypes = np.load(args.celltypes) # May want to add subsampling here... # Find neighbors within a given radius nbrs = fit_neighbors(centroids) distances, indices = query_radius(nbrs, centroids, args.r) # Compute profiles for each cell-type profiles = np.zeros((celltypes.shape[-1], celltypes.shape[0], args.b)) for i, labels in enumerate(celltypes.T): verbose_print(args, f'Counting cell-type {i}') profiles[i] = radial_profile(centroids, distances, indices, args.r, args.b, labels) # Save results np.save(args.output, profiles) verbose_print(args, f'Radial profiles saved to {args.output}') verbose_print(args, f'Calculating radial profiles done!')
def profiles_main(args): verbose_print(args, f'Calculating profiles from {args.mesh}') # Get vertices and normals mesh = load_mesh(args.mesh) verts = mesh['verts'] normals = mesh['normals'] # Load centers and labels centroids_um = np.load(args.centroids) labels = np.load(args.labels) sox2_labels = labels[:, 0] tbr1_labels = labels[:, 1] # Plot mesh if args.plot: plot_mesh(mesh['verts'], mesh['faces']) plot_nuclei(centroids_um, 10000, sox2_labels, tbr1_labels, scale_factor=8) mlab.show() # Calculate profiles verbose_print(args, f'Progress:') profiles = compute_profiles(verts, normals, args.l, args.b, args.r, centroids_um, sox2_labels, tbr1_labels) # Save the profiles np.save(args.output, profiles) verbose_print(args, f'Profiles saved to {args.output}') verbose_print(args, 'Calculating profiles done!')
def combine_main(args): verbose_print(args, f'Combining profiles based on {args.input}') # Get full paths for sampled profiles from analysis CSV parent_dir = os.path.abspath(os.path.join(args.input, os.pardir)) df = pd.read_csv(args.input, index_col=0) paths = [ os.path.join(parent_dir, df.loc[folder]['type'], folder, 'dataset', args.name) for folder in df.index ] # Adapted from niche.combine_main input_arrays = [np.load(path) for path in paths] combined = np.concatenate(input_arrays, axis=args.a) verbose_print( args, f'Saving combined features to {args.output} with shape {combined.shape}' ) np.save(args.output, combined) verbose_print(args, f'Saving organoid labels to {args.sample}') names = np.concatenate( [i * np.ones(len(arr)) for i, arr in enumerate(input_arrays)]) np.save(args.sample, names) verbose_print(args, f'Combining profiles done!')
def combine_main(args): verbose_print(args, f'Combining multiscale features') # Identfy all datasets to be analyzed if passed analysis CSV if os.path.splitext(args.inputs[0])[1] == '.csv': analysis = pd.read_csv(args.inputs[0], index_col=0) parent_dir = os.path.abspath(os.path.join(os.path.abspath(args.inputs[0]), os.pardir)) args.inputs = [os.path.join(parent_dir, t, f) for t, f in zip(analysis['type'], analysis.index)] dfs = [] for organoid in args.inputs: path = os.path.join(organoid, 'organoid_features.xlsx') dfs.append(pd.read_excel(path, index_col=0, skiprows=1)) df = pd.concat(dfs, axis=1, sort=False) df.to_excel(args.output) verbose_print(args, f'Combining multiscale features done!')
def segment_main(args): if args.n is None: nb_workers = multiprocessing.cpu_count() else: nb_workers = args.n # Open probability map Zarr array verbose_print(args, f'Segmenting nuclei in {args.input}') prob_arr = io.open(args.input, mode='r') shape, dtype, chunks = prob_arr.shape, prob_arr.dtype, prob_arr.chunks verbose_print(args, f'Opened image: {shape} {dtype}') if dtype != 'float32': warnings.warn( 'Input dtype is not float32... may not have passed a probability map' ) # Load nuclei centroids centroids = np.load(args.centroids) # Create foreground mask by thresholding the probability map verbose_print( args, f'Thresholding probability at {args.t}, writing foreground to {args.foreground}' ) foreground_arr = io.new_zarr(args.foreground, shape=shape, chunks=chunks, dtype='uint8') f = partial(_threshold_chunk, threshold=args.t, output=foreground_arr) utils.pmap_chunks(f, prob_arr, chunks, 1, use_imap=True) # Add watershed lines to the foreground mask to break up touching nuclei verbose_print( args, f'Performing watershed, writing binary segmentation to {args.output}') binary_seg = io.new_zarr(args.output, shape, chunks, 'uint8') watershed_centers_parallel(prob_arr, centers=centroids, mask=foreground_arr, output=binary_seg, chunks=chunks, overlap=args.o, nb_workers=nb_workers) verbose_print(args, 'Nuclei segmentation done!')
def rescale_main(args): nb_workers = _check_workers(args) # Find all TIFFs paths, filenames = tifs_in_dir(args.input) verbose_print(args, f"Found {len(paths)} TIFFs") # Load histogram and compute percentile from CDF df = pd.read_csv(args.histogram) bins = df['intensity'].to_numpy() counts = df['count'].to_numpy() total = counts.sum() cdf = np.cumsum(counts) target = total * (args.p / 100) abs_diff = np.abs(cdf - target) idx = np.where(abs_diff == abs_diff.min())[0] max_val = bins[idx][0] # min_val, max_val = bins[0], bins[-1] # Make the output folder os.makedirs(args.output, exist_ok=True) # Rescale images in parallel verbose_print(args, f"Rescaling images with {nb_workers} workers:") args_list = [] for path, filename in zip(paths, filenames): args_list.append((path, args.t, max_val, args.output, filename, args.c)) with multiprocessing.Pool(nb_workers) as pool: list(tqdm.tqdm(pool.imap(_rescale_image, args_list), total=len(paths))) verbose_print(args, f"Rescaling done!")
def foreground_main(args): verbose_print(args, f'Segmenting foreground from {args.input}') # Load the input image data = io.imread(args.input) # Smoothing if args.g is not None: data = gaussian_blur(data, args.g).astype(data.dtype) # Threshold image foreground = (data > args.t) # .astype(np.uint8) # Fill holes # This is done slice-by-slice for now since there could be imaging problems where # a part of a ventricle is actually in the image at z = 0 or z = -1 output = np.empty(foreground.shape, dtype=np.uint8) for i, img in enumerate(foreground): output[i] = binary_fill_holes(img) output *= 255 # Save the result to TIFF io.imsave(args.output, output, compress=3) verbose_print(args, f'Segmentation written to {args.output}') verbose_print(args, f'Foreground segmentation done!')
def proximity_main(args): verbose_print( args, f'Calculating proximity to each cell-type for {args.centroids}') # Load centroids and cell-type labels centroids = np.load(args.centroids) celltypes = np.load(args.celltypes) # Check for any mismatch if args.r is None: radius = np.ones(celltypes.shape[-1]) verbose_print(args, f'No reference radii specified... just using ones') else: radius = tuple(args.r) verbose_print(args, f'Using {radius} reference radii') if len(radius) != celltypes.shape[-1]: raise ValueError( 'The number of reference radii must match the number of provided cell-types' ) # May want to add subsampling here... # Calculate proximity to each cell-type proximities = proximity(centroids, celltypes, args.k, radius) # Show plot if args.plot: idx = np.arange(len(proximities)) np.random.shuffle(idx) idx = idx[:100000] plt.plot(proximities[idx, 0], proximities[idx, 1], '.', alpha=0.01) plt.show() # Save the proximities np.save(args.output, proximities) verbose_print(args, f'Proximities saved to {args.output}') verbose_print(args, f'Calculating proximities done!')
def gate_main(args): verbose_print(args, f'Gating cells based on fluorescence in {args.input}') # Load MFIs and check for mismatch mfis = np.load(args.input) if mfis.shape[-1] != len(args.thresholds): raise ValueError( 'Number of thresholds must match the number of channels in MFI array' ) # Show plot if args.plot: verbose_print(args, f'Showing cytometry plot...') mfi_x, mfi_y = mfis[:, args.x], mfis[:, args.y] if args.r is None: x_max = mfi_x.max() y_max = mfi_y.max() else: x_max = args.r[0] y_max = args.r[1] plt.hist2d(mfi_x, mfi_y, bins=args.b, norm=colors.PowerNorm(0.25), range=((0, x_max), (0, y_max))) plt.plot([args.thresholds[0], args.thresholds[0]], [0, y_max], 'r-') plt.plot([0, x_max], [args.thresholds[1], args.thresholds[1]], 'r-') plt.xlim([0, x_max]) plt.ylim([0, y_max]) plt.xlabel(f'MFI column {args.x}') plt.ylabel(f'MFI column {args.y}') plt.show() # Gate each channel labels = np.asarray( [threshold_mfi(mfi, t) for mfi, t in zip(mfis.T, args.thresholds)], dtype=np.uint8).T # TODO: Add DN labels in here # Save the result np.save(args.output, labels) verbose_print(args, f'Gating results written to {args.output}') verbose_print(args, f'Gating cells done!')
def features_main(args): verbose_print(args, f'Calculating multiscale features') # Identfy all datasets to be analyzed if os.path.isdir(args.input): input_folders = [os.path.basename(os.path.abspath(args.input))] elif os.path.splitext(os.path.abspath(args.input))[1] == '.csv': analysis = pd.read_csv(os.path.abspath(args.input), index_col=0) parent_dir = os.path.abspath(os.path.join(os.path.abspath(args.input), os.pardir)) input_folders = [os.path.join(parent_dir, t, f) for t, f in zip(analysis['type'], analysis.index)] else: raise ValueError('Input must be a folder with a symlinked dataset or an analysis CSV file') # Analyze each dataset for input_folder in input_folders: verbose_print(args, f'Calculating multiscale features for {os.path.basename(input_folder)}') # inject current folder path into command line arguments args.input = os.path.abspath(input_folder) # Create a dictionary for holding all features features = {'dataset': os.path.basename(args.input)} # Load all single-cell data verbose_print(args, f'Loading input single cell measurements') gate_labels = np.load(os.path.join(args.input, 'dataset/nuclei_gating.npy')) nuclei_morphologies = pd.read_csv(os.path.join(args.input, 'dataset/nuclei_morphologies.csv')) niche_proximities = np.load(os.path.join(args.input, 'dataset/niche_proximities.npy')) niche_labels = np.load(os.path.join(args.input, 'dataset/niche_labels.npy')) # Add in double negatives # TODO: Move this to nuclei module negatives = np.logical_and(gate_labels[:, 0] == 0, gate_labels[:, 1] == 0) gate_labels = np.hstack([gate_labels, negatives[:, np.newaxis]]) # Calculate multiscale features features = singlecell_features(args, features, gate_labels, niche_labels, nuclei_morphologies, niche_proximities) features = cytoarchitecture_features(args, features) features = wholeorg_features(args, features, gate_labels, niche_labels) # Save results df = pd.Series(features) df.to_excel(os.path.join(args.input, 'organoid_features.xlsx')) verbose_print(args, f'Multiscale features done!')
def old_preprocessing_main(args): if args.t is None and args.s is None and args.k is None: raise ValueError('No preprocessing tasks were specified') verbose_print(args, f"Preprocessing {args.input}") if os.path.isdir(args.input): # Load series of 2D TIFFs and process in parallel paths, filenames = tifs_in_dir(args.input) img = io.imread(paths[0]) shape = (len(paths), *img.shape) if args.float: dtype = 'float32' else: dtype = img.dtype arr = io.new_zarr(args.zarr, shape=shape, dtype=dtype, chunks=tuple(args.c)) args_list = [] for i, (path, _) in enumerate(zip(paths, filenames)): args_list.append((args, path, arr, i)) with multiprocessing.Pool(multiprocessing.cpu_count()) as pool: list(tqdm.tqdm(pool.imap_unordered(_preprocess_image2d, args_list), total=len(args_list))) if args.p is not None: before = io.imread(paths[args.p]) after = arr[args.p] elif os.path.isdir(args.input): # Load 3D TIFF and process in memory img = io.imread(args.input) # Keep reference to before image if plotting if args.p is not None: before = np.copy(img[args.p]) verbose_print(args, f"Loaded image: {img.shape} {img.dtype}") img = preprocess_image3d(args, img) if args.p is not None: after = np.copy(img[args.p]) else: raise ValueError('Input is not a valid directory or file') # Show A/B plot if args.p is not None: plt.subplot(121) plt.imshow(before) plt.title('Before') plt.subplot(122) plt.imshow(after) plt.title('After') plt.show() verbose_print(args, f"Preprocessing done!")
def combine_main(args): verbose_print(args, f'Combining features from {len(args.inputs)} arrays') input_arrays = [np.load(path) for path in args.inputs] combined = np.concatenate(input_arrays, axis=args.a) verbose_print( args, f'Saving combined features to {args.output} with shape {combined.shape}' ) np.save(args.output, combined) verbose_print(args, f'Saving organoid labels to {args.sample}') names = np.concatenate( [i * np.ones(len(arr)) for i, arr in enumerate(input_arrays)]) np.save(args.sample, names) verbose_print(args, f'Combining features done!')
def histogram_main(args): # Find all TIFFs paths, _ = tifs_in_dir(args.input) verbose_print(args, f"Found {len(paths)} TIFFs") # Estimate histogram sample_paths = downsample_paths(paths, step=args.s) verbose_print(args, f"Calculating histogram from {len(sample_paths)} images:") hist, bin_centers = estimate_histogram(sample_paths) # Show plot if args.plot: plt.plot(bin_centers, hist) plt.show() # Build CSV df = pd.DataFrame({'intensity': bin_centers, 'count': hist}) df.to_csv(args.output, index=False) verbose_print(args, f"Histogram saved to {args.output}") verbose_print(args, f"Histogram done!")
def select_main(args): verbose_print(args, f'Selecting datasets for analysis') # Load dataset CSV and select datasets by group df = pd.read_csv(args.input, index_col=0) groups = [df.where(df['type'] == g).dropna() for g in args.groups] for g, name in zip(groups, args.groups): verbose_print(args, f'Found {len(g)} datasets in group {name}') # Create output CSV df2 = pd.concat(groups) df2.to_csv(args.output) verbose_print(args, f'Done selecting datasets!')
def stack_main(args): verbose_print(args, f'Stacking images in {args.input}') paths, filenames = utils.tifs_in_dir(args.input) verbose_print(args, f'Found {len(paths)} images') img0 = io.imread(paths[0]) shape2d, dtype = img0.shape, img0.dtype img = np.empty((len(paths), *shape2d), dtype) for z, path in tqdm(enumerate(paths), total=len(paths)): img[z] = io.imread(path) io.imsave(args.output, img, compress=1) verbose_print(args, f'Stacking done!')
def setup_main(args): verbose_print(args, f'Setting up analysis folder') # Load the CSV as a dataframe df = pd.read_csv(args.input, index_col=0) # Create folders for each group groups = list(set(df['type'])) groups.sort() for group in groups: verbose_print(args, f'Making directory for {group} group') os.makedirs(os.path.join(args.output, group), exist_ok=True) # Create folders for each dataset with symlinks to underlying data for path in df.index: group = df['type'].loc[path] new_dir = os.path.join(args.output, group, path) verbose_print(args, f'Making directory and symlink for {path}') os.makedirs(new_dir, exist_ok=True) os.symlink(os.path.join(os.path.abspath(args.datasets), path), os.path.join(os.path.abspath(new_dir), 'dataset')) verbose_print(args, f'Done setting up analysis folder!')
def cluster_main(args): # This is OLD... See the "determine cyto clusters" notebook verbose_print(args, f'Clustering profiles from {args.input}') # Load profiles profiles = np.load(args.input) # Convert to features features = profiles_to_features(profiles) # Cluster kmeans = KMeans(n_clusters=args.n, random_state=0, n_init=10).fit(features) labels = kmeans.labels_ # x_tsne = TSNE(n_components=2, n_jobs=-1, perplexity=500).fit_transform(features) x_tsne = UMAP().fit_transform(features) if args.plot: for i in range(args.n): idx = np.where(labels == i)[0] plt.plot(x_tsne[idx, 0], x_tsne[idx, 1], '.', alpha=1.0, markersize=3) plt.show() # Save the labels np.save(args.labels, labels) np.save(args.tsne, x_tsne) verbose_print(args, f'Labels saved to {args.labels}') verbose_print(args, f't-SNE coordinates saved to {args.tsne}') # TODO: Save trained clustering model for classifying new samples (either KMeans or GaussianMixture) verbose_print(args, 'Calculating profiles done!')
def convert_main(args): nb_workers = _check_workers(args) verbose_print(args, f"Converting {args.input} to Zarr") # Find all TIFFs paths, filenames = tifs_in_dir(args.input) verbose_print(args, f"Found {len(paths)} TIFFs") paths_chunked = [paths[pos:pos + args.c[0]] for pos in range(0, len(paths), args.c[0])] img = io.imread(paths[0]) shape = (len(paths), *img.shape) dtype = img.dtype chunks = tuple(args.c) arr = io.new_zarr(args.output, shape=shape, dtype=dtype, chunks=chunks) verbose_print(args, f"Writiing to {args.output}") args_list = [] for i, paths_batch in enumerate(paths_chunked): args_list.append((paths_batch, i, chunks[0], arr)) with multiprocessing.Pool(nb_workers) as pool: list(tqdm.tqdm(pool.imap(_convert_batch, args_list), total=len(args_list))) verbose_print(args, f"Conversion done!")
def contrast_main(args): # Initial setup nb_workers = _check_workers(args) if args.k is None: verbose_print(args, f"Performing histogram equalization with default kernel size") kernel_size = None else: verbose_print(args, f"Performing histogram equalization with kernel size {args.k}") kernel_size = args.k # Find all TIFFs paths, filenames = tifs_in_dir(args.input) verbose_print(args, f"Found {len(paths)} TIFFs") # Make output folder os.makedirs(args.output, exist_ok=True) for path, filename in tqdm.tqdm(zip(paths, filenames), total=len(paths)): img = io.imread(path) adjusted = equalize_adapthist(img, kernel_size=kernel_size).astype(np.float32) io.imsave(os.path.join(args.output, filename), adjusted, compress=args.c) verbose_print(args, f"Contrast done!")
def denoise_main(args): # Initial setup nb_workers = _check_workers(args) os.makedirs(args.output, exist_ok=True) # Find all TIFFs paths, _ = tifs_in_dir(args.input) verbose_print(args, f"Found {len(paths)} TIFFs") # Curry denoising function for pmap f = partial(denoise2d, sigma=args.s, wavelet=args.w) g = partial(read_process_write, f=f, output=args.output, compress=args.c) # Parallel read, denoise, write verbose_print(args, f"Denoising with {nb_workers} workers:") with multiprocessing.Pool(nb_workers) as pool: list(tqdm.tqdm(pool.imap(g, paths), total=len(paths))) verbose_print(args, f"Denoising done!")
def downsample_main(args): if args.n is None: nb_workers = multiprocessing.cpu_count() else: nb_workers = args.n verbose_print(args, f'Downsampling {args.input} with factors {args.factor}') if args.tiff: os.makedirs(args.output, exist_ok=True) paths, filenames = utils.tifs_in_dir(args.input) args_list = [] for path, filename in zip(paths, filenames): args_list.append((path, args.factor, args.output, filename)) with multiprocessing.Pool(nb_workers) as pool: pool.starmap(read_downsample_write, args_list) # for i, (path, filename) in enumerate(zip(paths, filenames)): # verbose_print(args, f'Downsampling {filename}') # arr = io.imread(path) # if isinstance(args.factor, int): # factors = tuple(args.factor for _ in range(arr.ndim)) # else: # factors = tuple(args.factor) # data = downsample(arr, factors) # output = os.path.join(args.output, filename) # io.imsave(output, data, compress=3) else: arr = io.open(args.input, mode='r') if isinstance(args.factor, int): factors = tuple(args.factor for _ in range(arr.ndim)) else: factors = tuple(args.factor) data = downsample(arr, factors) verbose_print(args, f'Writing result to {args.output}') io.imsave(args.output, data, compress=3) verbose_print(args, f'Downsampling done!')
def mesh_main(args): if args.g is not None: if len(args.g) == 1: sigma = args.g[0] else: sigma = tuple(args.g) if args.d is None: downsample_factor = 1 else: downsample_factor = np.asarray(args.d) verbose_print(args, f'Meshing segmentation at {args.input}') # Calculate the downsampled voxel size voxel_orig = read_voxel_size(args.voxel_size) voxel_down = tuple(voxel_orig * downsample_factor) verbose_print(args, f'Original voxel size (um): {voxel_orig}') verbose_print(args, f'Downsampled voxel size (um): {voxel_down}') # Load segmentation seg = io.imread(args.input) # Smooth segmentation if args.g is not None: seg = smooth_segmentation(seg, sigma) verbose_print(args, f'Smoothed segmentation with sigma {sigma}') # Calculate mesh surface verts, faces, normals, values = marching_cubes(seg, args.l, voxel_down, args.s) mesh = { 'verts': verts, 'faces': faces, 'normals': normals, 'values': values } verbose_print(args, f'Computed mesh with {len(normals)} normals') # Plot mesh if args.plot: plot_mesh(mesh['verts'], mesh['faces']) mlab.show() # Save mesh save_mesh(args.output, mesh) verbose_print(args, f'Mesh saved to {args.output}') verbose_print(args, 'Meshing done!')
def ventricle_main(args): verbose_print(args, f'Segmenting ventricles in {args.input}') # Load the input image data = io.imread(args.input) # Load the model if args.model.endswith('.pt'): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # device = torch.device("cpu") model = load_model(args.model, device) model = model.eval() verbose_print( args, f'Pytorch model successfully loaded from {args.model} to {device} device' ) # Segment the input image verbose_print(args, f'Segmentation progress:') output = segment_ventricles(model, data, args.t, device) elif args.model.endswith('.h5'): model = load_keras_model(args.model) verbose_print(args, f'Kerass model successfully loaded from {args.model}') # Segment the input image verbose_print(args, f'Segmentation progress:') output = segment_ventricles_keras(model, data, args.t) # Remove border regions if args.exclude_border: verbose_print(args, f'Removing regions connected to image border') # This could also be done in 3D instead of slice-by-slice # I'm not sure if images will start in ventricle, so doing slice-by-slice to be safe img = np.zeros_like(output) for i, data in tqdm(enumerate(output), total=len(output)): img[i] = clear_border(data) output = img # Save the result to TIFF io.imsave(args.output, output, compress=3) verbose_print(args, f'Segmentation written to {args.output}') verbose_print(args, f'Ventricle segmentation done!')
def fluorescence_main(args): if isinstance(args.inputs, list): inputs = args.inputs else: inputs = [args.inputs] nb_images = len(inputs) verbose_print(args, f'Passed {nb_images} images to measure fluorescence') # Load centroids centroids = np.load(args.centroids) # Initialize output arrays mfis = np.zeros((centroids.shape[0], nb_images)) stdevs = np.zeros((centroids.shape[0], nb_images)) for i, path in enumerate(inputs): # Open image arr = io.open(path, mode='r') shape, dtype, chunks = arr.shape, arr.dtype, arr.chunks verbose_print(args, f'Sampling from {path}: {shape} {dtype}') # Sample image if args.g is not None: # Perform smoothing in a temporary array verbose_print(args, f'Smoothing {path} with sigma {tuple(args.g)}') with tempfile.TemporaryDirectory( prefix=os.path.abspath('.')) as temp_path: smoothed_arr = io.new_zarr(temp_path, shape, chunks, dtype) gaussian_blur_parallel( arr, args.g, smoothed_arr, arr.chunks, args.o, args.w) # Too many workers gives Zarr race condition verbose_print(args, f'Sampling fluorescence from smoothed {path}') intensities = nuclei_centered_intensities(smoothed_arr, centroids, args.r, mode=args.m, nb_workers=args.w) # Temporary array deleted when context ends else: intensities = nuclei_centered_intensities(arr, centroids, args.r, mode=args.m, nb_workers=args.w) # Compute statistics mfis[:, i] = calculate_mfi(intensities) stdevs[:, i] = calculate_stdev(intensities) # Make output folder os.makedirs(args.output, exist_ok=True) # Save numpy array of MFIs and stdevs mfi_path = os.path.join(args.output, 'nuclei_mfis.npy') np.save(mfi_path, mfis) verbose_print(args, f'MFIs written to {mfi_path}') stdev_path = os.path.join(args.output, 'nuclei_stdevs.npy') np.save(stdev_path, stdevs) verbose_print(args, f'StDevs written to {stdev_path}') # Save CSV containing morphologies for each detected centroid # sox2.zarr/ <-- forward slash makes os.path.basename eval to empty string # Can use os.path.dirname(path) to get sox2.zarr, then use basename on that basenames = [ os.path.basename(os.path.dirname(path)).split('.')[0] for path in inputs ] csv_names = ['fluorescence_' + str(base) + '.csv' for base in basenames] csv_paths = [os.path.join(args.output, name) for name in csv_names] for i, (base, path) in enumerate(zip(basenames, csv_paths)): df = pd.DataFrame({'mfi': mfis[:, i], 'stdev': stdevs[:, i]}) df.to_csv(path) verbose_print(args, f'Fluorescence statistics for {base} written to {path}') verbose_print(args, f'Fluorescence measurements done!')
def morphology_main(args): if args.n is None: nb_workers = multiprocessing.cpu_count() else: nb_workers = args.n if args.segmentations is not None: return_seg = True else: return_seg = False verbose_print(args, f'Computing morphological features for {args.input}') # Get window size window_size = np.asarray(args.w) verbose_print(args, f'Using window size of {window_size} around each cell') # Load the detected centroids and open binary segmentation centroids = np.load( args.centroids) # TODO: Make this consider voxel dimensions binary_seg = io.open(args.input, mode='r') # Compute labeled segmentation and morphologies for each cell if return_seg: verbose_print( args, f'Computing segmentations and morphologies with {nb_workers} workers' ) else: verbose_print(args, f'Computing morphologies with {nb_workers} workers') args_list = [(centroid, window_size, binary_seg, return_seg) for centroid in centroids] with multiprocessing.Pool(nb_workers) as pool: results = list( tqdm(pool.imap(_segment_centroid, args_list), total=len(args_list))) # Unpack morphological features # features = np.array([center, volume, eq_diam, minor_length, major_length, axis_ratio]) features = np.asarray([r[0] for r in results]) # N x feats centers_z = features[:, 0] centers_y = features[:, 1] centers_x = features[:, 2] volumes = features[:, 3] eq_diams = features[:, 4] minor_lengths = features[:, 5] major_lengths = features[:, 6] axis_ratios = features[:, 7] # Save each segmentation if return_seg: verbose_print( args, f'Saving single-cell segmentations to {args.segmentations}') singles = np.asarray([r[1] for r in results]) np.savez_compressed(args.segmentations, singles) # Save CSV containing morphologies for each detected centroid data = { 'com_z': centers_z, 'com_y': centers_y, 'com_x': centers_x, 'volume': volumes, 'eq_diam': eq_diams, 'minor_length': minor_lengths, 'major_length': major_lengths, 'axis_ratio': axis_ratios } df = pd.DataFrame(data) df.to_csv(args.output) verbose_print(args, f'Morphological features written to {args.output}') verbose_print(args, f'Computing morphologies done!')
def classify_main(args): verbose_print( args, f'Training KNN model based on {args.profiles_train} and {args.labels_train}' ) # Load training data profiles_train = np.load(args.profiles_train) x_train = profiles_to_features( profiles_train, normalize=False) # Normalizes the data (should we do this?) if args.umap is not None: model = joblib.load(args.umap) x_train = model.transform(x_train) y_train = np.load(args.labels_train) classes = np.unique(y_train) if args.load is None: verbose_print(args, f'Training new model') # Train model # Logistic regression model # clf = LogisticRegression(random_state=0, # solver='lbfgs', # multi_class='multinomial', # max_iter=200, # n_jobs=-1).fit(x_train, y_train) # KNN classifier clf = KNeighborsClassifier(n_neighbors=1) clf.fit(x_train, y_train) verbose_print(args, f'Training accuracy: {clf.score(x_train, y_train):.4f}') else: verbose_print(args, f'Loading model from {args.load}') clf = joblib.load(args.load) if args.save is not None: verbose_print(args, f'Saving model to {args.save}') joblib.dump(clf, args.save) # Apply classifier profiles = np.load(args.profiles) x = profiles_to_features(profiles, normalize=False) if args.umap is not None: x = model.transform(x) labels = clf.predict(x) nb_cells = len(profiles) verbose_print( args, f'Classified {nb_cells} profiles into {len(classes)} cytoarchitecture classes' ) for c in classes: count = len(np.where(labels == c)[0]) verbose_print( args, f'Class {c}: {count:10d} profiles {100 * count / nb_cells:10.3f}%') # Save the niche labels np.save(args.labels, labels) verbose_print(args, f'Labels saved to {args.labels}') verbose_print(args, f'Classifying done!')
def preprocess_image3d(args, img): # Background removal if args.t is not None: verbose_print(args, f"Performing background removal with threshold {args.t}") img = remove_background(img, args.t) # Histogram equalization if args.k is not None: if args.k == 0: verbose_print(args, f"Performing histogram equalization with default kernel size") kernel_size = None else: verbose_print(args, f"Performing histogram equalization with kernel size {args.k}") kernel_size = args.k img = clahe(img, kernel_size=kernel_size) # Normalize and convert to float if args.float: img = rescale_intensity(img_as_float32(img)) verbose_print(args, f"Converted to normalized float32: min {img.min():.3f}, max {img.max():.3f}") # Denoising if args.s is not None: verbose_print(args, f"Performing noise removal with sigma {args.s} and wavelet {args.w}") img = denoise(img, args.s, args.w) # Convert to Zarr verbose_print(args, f"Saving result to {args.zarr}") arr = io.new_zarr(args.zarr, shape=img.shape, dtype=img.dtype, chunks=tuple(args.c)) arr[:] = img return img