Exemplo n.º 1
0
def sample_main(args):
    if isinstance(args.inputs, list):
        inputs = args.inputs
    else:
        inputs = [args.inputs]
    if isinstance(args.outputs, list):
        outputs = args.outputs
    else:
        outputs = [args.outputs]
    if len(inputs) != len(outputs):
        raise ValueError("Number of inputs and outputs must match")

    verbose_print(args,
                  f'Taking {args.samples} random samples from {args.inputs}')

    np.random.seed(args.seed)
    verbose_print(args, f'Random seed set to {args.seed}')

    # Load arrays
    input_arrs = [np.load(path) for path in args.inputs]

    # Randomly sample
    sampled_data, idx = randomly_sample(args.samples,
                                        *input_arrs,
                                        return_idx=True)

    # Save sample
    for output, samples in zip(outputs, sampled_data):
        np.save(output, samples)
        verbose_print(args, f'Saved samples to {output}')
    np.save(args.index, idx)
    verbose_print(args, f'Saved sample index to {args.index}')

    verbose_print(args, f'Randomly sampling done!')
Exemplo n.º 2
0
def _check_input(args):
    if os.path.isdir(args.input):
        verbose_print(args, f"Preprocessing 2D TIFFs in {args.input}")
        is_folder = True
    elif os.path.isfile(args.input):
        verbose_print(args, f"Preprocessing 3D TIFF {args.input}")
        is_folder = False
    else:
        raise ValueError('Input is not a valid directory or file')
    return is_folder
Exemplo n.º 3
0
def tsne_main(args):
    verbose_print(args, f'Loaded niche labels from {args.labels}')
    labels = np.load(args.labels)

    verbose_print(args, f'Running t-SNE based on {args.proximity}')
    proximities = np.load(args.proximity)

    x_tsne = TSNE(n_components=2, n_jobs=-1, perplexity=800,
                  learning_rate=100).fit_transform(proximities)

    if args.plot:
        # Show tSNE
        for i in range(4):
            idx = np.where(labels == i)[0]
            if len(idx) == 0:
                continue
            plt.plot(x_tsne[idx, 0], x_tsne[idx, 1], '.', label=f'Cluster {i}')
        plt.legend()
        plt.show()

    # Save the t-SNE coordinates
    np.save(args.tsne, x_tsne)
    verbose_print(args, f't-SNE coordinates saved to {args.tsne}')

    verbose_print(args, f'Niche clustering done!')
Exemplo n.º 4
0
def radial_main(args):
    verbose_print(args, f'Calculating radial profiles for {args.centroids}')

    # Load centroids and cell-type labels
    centroids = np.load(args.centroids)
    celltypes = np.load(args.celltypes)

    # May want to add subsampling here...

    # Find neighbors within a given radius
    nbrs = fit_neighbors(centroids)
    distances, indices = query_radius(nbrs, centroids, args.r)

    # Compute profiles for each cell-type

    profiles = np.zeros((celltypes.shape[-1], celltypes.shape[0], args.b))
    for i, labels in enumerate(celltypes.T):
        verbose_print(args, f'Counting cell-type {i}')
        profiles[i] = radial_profile(centroids, distances, indices, args.r,
                                     args.b, labels)

    # Save results
    np.save(args.output, profiles)
    verbose_print(args, f'Radial profiles saved to {args.output}')

    verbose_print(args, f'Calculating radial profiles done!')
Exemplo n.º 5
0
def profiles_main(args):
    verbose_print(args, f'Calculating profiles from {args.mesh}')

    # Get vertices and normals
    mesh = load_mesh(args.mesh)
    verts = mesh['verts']
    normals = mesh['normals']

    # Load centers and labels
    centroids_um = np.load(args.centroids)
    labels = np.load(args.labels)

    sox2_labels = labels[:, 0]
    tbr1_labels = labels[:, 1]

    # Plot mesh
    if args.plot:
        plot_mesh(mesh['verts'], mesh['faces'])
        plot_nuclei(centroids_um,
                    10000,
                    sox2_labels,
                    tbr1_labels,
                    scale_factor=8)
        mlab.show()

    # Calculate profiles
    verbose_print(args, f'Progress:')
    profiles = compute_profiles(verts, normals, args.l, args.b, args.r,
                                centroids_um, sox2_labels, tbr1_labels)

    # Save the profiles
    np.save(args.output, profiles)
    verbose_print(args, f'Profiles saved to {args.output}')

    verbose_print(args, 'Calculating profiles done!')
Exemplo n.º 6
0
def combine_main(args):
    verbose_print(args, f'Combining profiles based on {args.input}')

    # Get full paths for sampled profiles from analysis CSV
    parent_dir = os.path.abspath(os.path.join(args.input, os.pardir))
    df = pd.read_csv(args.input, index_col=0)
    paths = [
        os.path.join(parent_dir, df.loc[folder]['type'], folder, 'dataset',
                     args.name) for folder in df.index
    ]

    # Adapted from niche.combine_main
    input_arrays = [np.load(path) for path in paths]
    combined = np.concatenate(input_arrays, axis=args.a)

    verbose_print(
        args,
        f'Saving combined features to {args.output} with shape {combined.shape}'
    )
    np.save(args.output, combined)

    verbose_print(args, f'Saving organoid labels to {args.sample}')
    names = np.concatenate(
        [i * np.ones(len(arr)) for i, arr in enumerate(input_arrays)])
    np.save(args.sample, names)

    verbose_print(args, f'Combining profiles done!')
Exemplo n.º 7
0
def combine_main(args):
    verbose_print(args, f'Combining multiscale features')

    # Identfy all datasets to be analyzed if passed analysis CSV
    if os.path.splitext(args.inputs[0])[1] == '.csv':
        analysis = pd.read_csv(args.inputs[0], index_col=0)
        parent_dir = os.path.abspath(os.path.join(os.path.abspath(args.inputs[0]), os.pardir))
        args.inputs = [os.path.join(parent_dir, t, f) for t, f in zip(analysis['type'], analysis.index)]

    dfs = []
    for organoid in args.inputs:
        path = os.path.join(organoid, 'organoid_features.xlsx')
        dfs.append(pd.read_excel(path, index_col=0, skiprows=1))
    df = pd.concat(dfs, axis=1, sort=False)
    df.to_excel(args.output)

    verbose_print(args, f'Combining multiscale features done!')
Exemplo n.º 8
0
def segment_main(args):
    if args.n is None:
        nb_workers = multiprocessing.cpu_count()
    else:
        nb_workers = args.n

    # Open probability map Zarr array
    verbose_print(args, f'Segmenting nuclei in {args.input}')
    prob_arr = io.open(args.input, mode='r')
    shape, dtype, chunks = prob_arr.shape, prob_arr.dtype, prob_arr.chunks
    verbose_print(args, f'Opened image: {shape} {dtype}')
    if dtype != 'float32':
        warnings.warn(
            'Input dtype is not float32... may not have passed a probability map'
        )

    # Load nuclei centroids
    centroids = np.load(args.centroids)

    # Create foreground mask by thresholding the probability map
    verbose_print(
        args,
        f'Thresholding probability at {args.t}, writing foreground to {args.foreground}'
    )
    foreground_arr = io.new_zarr(args.foreground,
                                 shape=shape,
                                 chunks=chunks,
                                 dtype='uint8')
    f = partial(_threshold_chunk, threshold=args.t, output=foreground_arr)
    utils.pmap_chunks(f, prob_arr, chunks, 1, use_imap=True)

    # Add watershed lines to the foreground mask to break up touching nuclei
    verbose_print(
        args,
        f'Performing watershed, writing binary segmentation to {args.output}')
    binary_seg = io.new_zarr(args.output, shape, chunks, 'uint8')
    watershed_centers_parallel(prob_arr,
                               centers=centroids,
                               mask=foreground_arr,
                               output=binary_seg,
                               chunks=chunks,
                               overlap=args.o,
                               nb_workers=nb_workers)

    verbose_print(args, 'Nuclei segmentation done!')
Exemplo n.º 9
0
def rescale_main(args):
    nb_workers = _check_workers(args)

    # Find all TIFFs
    paths, filenames = tifs_in_dir(args.input)
    verbose_print(args, f"Found {len(paths)} TIFFs")

    # Load histogram and compute percentile from CDF
    df = pd.read_csv(args.histogram)
    bins = df['intensity'].to_numpy()
    counts = df['count'].to_numpy()
    total = counts.sum()
    cdf = np.cumsum(counts)
    target = total * (args.p / 100)
    abs_diff = np.abs(cdf - target)
    idx = np.where(abs_diff == abs_diff.min())[0]
    max_val = bins[idx][0]
    # min_val, max_val = bins[0], bins[-1]

    # Make the output folder
    os.makedirs(args.output, exist_ok=True)

    # Rescale images in parallel
    verbose_print(args, f"Rescaling images with {nb_workers} workers:")
    args_list = []
    for path, filename in zip(paths, filenames):
        args_list.append((path, args.t, max_val, args.output, filename, args.c))
    with multiprocessing.Pool(nb_workers) as pool:
        list(tqdm.tqdm(pool.imap(_rescale_image, args_list), total=len(paths)))

    verbose_print(args, f"Rescaling done!")
Exemplo n.º 10
0
def foreground_main(args):
    verbose_print(args, f'Segmenting foreground from {args.input}')

    # Load the input image
    data = io.imread(args.input)

    # Smoothing
    if args.g is not None:
        data = gaussian_blur(data, args.g).astype(data.dtype)

    # Threshold image
    foreground = (data > args.t)  # .astype(np.uint8)

    # Fill holes
    # This is done slice-by-slice for now since there could be imaging problems where
    # a part of a ventricle is actually in the image at z = 0 or z = -1
    output = np.empty(foreground.shape, dtype=np.uint8)
    for i, img in enumerate(foreground):
        output[i] = binary_fill_holes(img)
    output *= 255

    # Save the result to TIFF
    io.imsave(args.output, output, compress=3)
    verbose_print(args, f'Segmentation written to {args.output}')

    verbose_print(args, f'Foreground segmentation done!')
Exemplo n.º 11
0
def proximity_main(args):
    verbose_print(
        args, f'Calculating proximity to each cell-type for {args.centroids}')

    # Load centroids and cell-type labels
    centroids = np.load(args.centroids)
    celltypes = np.load(args.celltypes)

    # Check for any mismatch
    if args.r is None:
        radius = np.ones(celltypes.shape[-1])
        verbose_print(args, f'No reference radii specified... just using ones')
    else:
        radius = tuple(args.r)
        verbose_print(args, f'Using {radius} reference radii')
        if len(radius) != celltypes.shape[-1]:
            raise ValueError(
                'The number of reference radii must match the number of provided cell-types'
            )

    # May want to add subsampling here...

    # Calculate proximity to each cell-type
    proximities = proximity(centroids, celltypes, args.k, radius)

    # Show plot
    if args.plot:
        idx = np.arange(len(proximities))
        np.random.shuffle(idx)
        idx = idx[:100000]
        plt.plot(proximities[idx, 0], proximities[idx, 1], '.', alpha=0.01)
        plt.show()

    # Save the proximities
    np.save(args.output, proximities)
    verbose_print(args, f'Proximities saved to {args.output}')

    verbose_print(args, f'Calculating proximities done!')
Exemplo n.º 12
0
def gate_main(args):
    verbose_print(args, f'Gating cells based on fluorescence in {args.input}')

    # Load MFIs and check for mismatch
    mfis = np.load(args.input)
    if mfis.shape[-1] != len(args.thresholds):
        raise ValueError(
            'Number of thresholds must match the number of channels in MFI array'
        )

    # Show plot
    if args.plot:
        verbose_print(args, f'Showing cytometry plot...')

        mfi_x, mfi_y = mfis[:, args.x], mfis[:, args.y]

        if args.r is None:
            x_max = mfi_x.max()
            y_max = mfi_y.max()
        else:
            x_max = args.r[0]
            y_max = args.r[1]

        plt.hist2d(mfi_x,
                   mfi_y,
                   bins=args.b,
                   norm=colors.PowerNorm(0.25),
                   range=((0, x_max), (0, y_max)))
        plt.plot([args.thresholds[0], args.thresholds[0]], [0, y_max], 'r-')
        plt.plot([0, x_max], [args.thresholds[1], args.thresholds[1]], 'r-')
        plt.xlim([0, x_max])
        plt.ylim([0, y_max])
        plt.xlabel(f'MFI column {args.x}')
        plt.ylabel(f'MFI column {args.y}')
        plt.show()

    # Gate each channel
    labels = np.asarray(
        [threshold_mfi(mfi, t) for mfi, t in zip(mfis.T, args.thresholds)],
        dtype=np.uint8).T
    # TODO: Add DN labels in here

    # Save the result
    np.save(args.output, labels)
    verbose_print(args, f'Gating results written to {args.output}')

    verbose_print(args, f'Gating cells done!')
Exemplo n.º 13
0
def features_main(args):
    verbose_print(args, f'Calculating multiscale features')

    # Identfy all datasets to be analyzed
    if os.path.isdir(args.input):
        input_folders = [os.path.basename(os.path.abspath(args.input))]
    elif os.path.splitext(os.path.abspath(args.input))[1] == '.csv':
        analysis = pd.read_csv(os.path.abspath(args.input), index_col=0)
        parent_dir = os.path.abspath(os.path.join(os.path.abspath(args.input), os.pardir))
        input_folders = [os.path.join(parent_dir, t, f) for t, f in zip(analysis['type'], analysis.index)]
    else:
        raise ValueError('Input must be a folder with a symlinked dataset or an analysis CSV file')

    # Analyze each dataset
    for input_folder in input_folders:
        verbose_print(args, f'Calculating multiscale features for {os.path.basename(input_folder)}')

        # inject current folder path into command line arguments
        args.input = os.path.abspath(input_folder)

        # Create a dictionary for holding all features
        features = {'dataset': os.path.basename(args.input)}

        # Load all single-cell data
        verbose_print(args, f'Loading input single cell measurements')
        gate_labels = np.load(os.path.join(args.input, 'dataset/nuclei_gating.npy'))
        nuclei_morphologies = pd.read_csv(os.path.join(args.input, 'dataset/nuclei_morphologies.csv'))
        niche_proximities = np.load(os.path.join(args.input, 'dataset/niche_proximities.npy'))
        niche_labels = np.load(os.path.join(args.input, 'dataset/niche_labels.npy'))

        # Add in double negatives
        # TODO: Move this to nuclei module
        negatives = np.logical_and(gate_labels[:, 0] == 0, gate_labels[:, 1] == 0)
        gate_labels = np.hstack([gate_labels, negatives[:, np.newaxis]])

        # Calculate multiscale features
        features = singlecell_features(args, features, gate_labels, niche_labels, nuclei_morphologies, niche_proximities)
        features = cytoarchitecture_features(args, features)
        features = wholeorg_features(args, features, gate_labels, niche_labels)

        # Save results
        df = pd.Series(features)
        df.to_excel(os.path.join(args.input, 'organoid_features.xlsx'))

    verbose_print(args, f'Multiscale features done!')
Exemplo n.º 14
0
def old_preprocessing_main(args):
    if args.t is None and args.s is None and args.k is None:
        raise ValueError('No preprocessing tasks were specified')

    verbose_print(args, f"Preprocessing {args.input}")

    if os.path.isdir(args.input):
        # Load series of 2D TIFFs and process in parallel
        paths, filenames = tifs_in_dir(args.input)

        img = io.imread(paths[0])
        shape = (len(paths), *img.shape)
        if args.float:
            dtype = 'float32'
        else:
            dtype = img.dtype

        arr = io.new_zarr(args.zarr, shape=shape, dtype=dtype, chunks=tuple(args.c))

        args_list = []
        for i, (path, _) in enumerate(zip(paths, filenames)):
            args_list.append((args, path, arr, i))

        with multiprocessing.Pool(multiprocessing.cpu_count()) as pool:
            list(tqdm.tqdm(pool.imap_unordered(_preprocess_image2d, args_list), total=len(args_list)))

        if args.p is not None:
            before = io.imread(paths[args.p])
            after = arr[args.p]

    elif os.path.isdir(args.input):
        # Load 3D TIFF and process in memory
        img = io.imread(args.input)
        # Keep reference to before image if plotting
        if args.p is not None:
            before = np.copy(img[args.p])
        verbose_print(args, f"Loaded image: {img.shape} {img.dtype}")
        img = preprocess_image3d(args, img)
        if args.p is not None:
            after = np.copy(img[args.p])

    else:
        raise ValueError('Input is not a valid directory or file')

    # Show A/B plot
    if args.p is not None:
        plt.subplot(121)
        plt.imshow(before)
        plt.title('Before')
        plt.subplot(122)
        plt.imshow(after)
        plt.title('After')
        plt.show()

    verbose_print(args, f"Preprocessing done!")
Exemplo n.º 15
0
def combine_main(args):
    verbose_print(args, f'Combining features from {len(args.inputs)} arrays')

    input_arrays = [np.load(path) for path in args.inputs]
    combined = np.concatenate(input_arrays, axis=args.a)

    verbose_print(
        args,
        f'Saving combined features to {args.output} with shape {combined.shape}'
    )
    np.save(args.output, combined)

    verbose_print(args, f'Saving organoid labels to {args.sample}')
    names = np.concatenate(
        [i * np.ones(len(arr)) for i, arr in enumerate(input_arrays)])
    np.save(args.sample, names)

    verbose_print(args, f'Combining features done!')
Exemplo n.º 16
0
def histogram_main(args):
    # Find all TIFFs
    paths, _ = tifs_in_dir(args.input)
    verbose_print(args, f"Found {len(paths)} TIFFs")

    # Estimate histogram
    sample_paths = downsample_paths(paths, step=args.s)
    verbose_print(args, f"Calculating histogram from {len(sample_paths)} images:")
    hist, bin_centers = estimate_histogram(sample_paths)

    # Show plot
    if args.plot:
        plt.plot(bin_centers, hist)
        plt.show()

    # Build CSV
    df = pd.DataFrame({'intensity': bin_centers, 'count': hist})
    df.to_csv(args.output, index=False)
    verbose_print(args, f"Histogram saved to {args.output}")

    verbose_print(args, f"Histogram done!")
Exemplo n.º 17
0
def select_main(args):
    verbose_print(args, f'Selecting datasets for analysis')

    # Load dataset CSV and select datasets by group
    df = pd.read_csv(args.input, index_col=0)
    groups = [df.where(df['type'] == g).dropna() for g in args.groups]
    for g, name in zip(groups, args.groups):
        verbose_print(args, f'Found {len(g)} datasets in group {name}')

    # Create output CSV
    df2 = pd.concat(groups)
    df2.to_csv(args.output)

    verbose_print(args, f'Done selecting datasets!')
Exemplo n.º 18
0
def stack_main(args):
    verbose_print(args, f'Stacking images in {args.input}')

    paths, filenames = utils.tifs_in_dir(args.input)
    verbose_print(args, f'Found {len(paths)} images')

    img0 = io.imread(paths[0])
    shape2d, dtype = img0.shape, img0.dtype
    img = np.empty((len(paths), *shape2d), dtype)
    for z, path in tqdm(enumerate(paths), total=len(paths)):
        img[z] = io.imread(path)

    io.imsave(args.output, img, compress=1)

    verbose_print(args, f'Stacking done!')
Exemplo n.º 19
0
def setup_main(args):
    verbose_print(args, f'Setting up analysis folder')

    # Load the CSV as a dataframe
    df = pd.read_csv(args.input, index_col=0)

    # Create folders for each group
    groups = list(set(df['type']))
    groups.sort()
    for group in groups:
        verbose_print(args, f'Making directory for {group} group')
        os.makedirs(os.path.join(args.output, group), exist_ok=True)

    # Create folders for each dataset with symlinks to underlying data
    for path in df.index:
        group = df['type'].loc[path]
        new_dir = os.path.join(args.output, group, path)
        verbose_print(args, f'Making directory and symlink for {path}')
        os.makedirs(new_dir, exist_ok=True)
        os.symlink(os.path.join(os.path.abspath(args.datasets), path),
                   os.path.join(os.path.abspath(new_dir), 'dataset'))

    verbose_print(args, f'Done setting up analysis folder!')
Exemplo n.º 20
0
def cluster_main(args):
    # This is OLD... See the "determine cyto clusters" notebook

    verbose_print(args, f'Clustering profiles from {args.input}')

    # Load profiles
    profiles = np.load(args.input)

    # Convert to features
    features = profiles_to_features(profiles)

    # Cluster
    kmeans = KMeans(n_clusters=args.n, random_state=0, n_init=10).fit(features)
    labels = kmeans.labels_

    # x_tsne = TSNE(n_components=2, n_jobs=-1, perplexity=500).fit_transform(features)
    x_tsne = UMAP().fit_transform(features)

    if args.plot:
        for i in range(args.n):
            idx = np.where(labels == i)[0]
            plt.plot(x_tsne[idx, 0],
                     x_tsne[idx, 1],
                     '.',
                     alpha=1.0,
                     markersize=3)
        plt.show()

    # Save the labels
    np.save(args.labels, labels)
    np.save(args.tsne, x_tsne)
    verbose_print(args, f'Labels saved to {args.labels}')
    verbose_print(args, f't-SNE coordinates saved to {args.tsne}')

    # TODO: Save trained clustering model for classifying new samples (either KMeans or GaussianMixture)

    verbose_print(args, 'Calculating profiles done!')
Exemplo n.º 21
0
def convert_main(args):
    nb_workers = _check_workers(args)

    verbose_print(args, f"Converting {args.input} to Zarr")

    # Find all TIFFs
    paths, filenames = tifs_in_dir(args.input)
    verbose_print(args, f"Found {len(paths)} TIFFs")
    paths_chunked = [paths[pos:pos + args.c[0]] for pos in range(0, len(paths), args.c[0])]

    img = io.imread(paths[0])
    shape = (len(paths), *img.shape)
    dtype = img.dtype
    chunks = tuple(args.c)
    arr = io.new_zarr(args.output, shape=shape, dtype=dtype, chunks=chunks)

    verbose_print(args, f"Writiing to {args.output}")
    args_list = []
    for i, paths_batch in enumerate(paths_chunked):
        args_list.append((paths_batch, i, chunks[0], arr))
    with multiprocessing.Pool(nb_workers) as pool:
        list(tqdm.tqdm(pool.imap(_convert_batch, args_list), total=len(args_list)))

    verbose_print(args, f"Conversion done!")
Exemplo n.º 22
0
def contrast_main(args):
    # Initial setup
    nb_workers = _check_workers(args)

    if args.k is None:
        verbose_print(args, f"Performing histogram equalization with default kernel size")
        kernel_size = None
    else:
        verbose_print(args, f"Performing histogram equalization with kernel size {args.k}")
        kernel_size = args.k

    # Find all TIFFs
    paths, filenames = tifs_in_dir(args.input)
    verbose_print(args, f"Found {len(paths)} TIFFs")

    # Make output folder
    os.makedirs(args.output, exist_ok=True)

    for path, filename in tqdm.tqdm(zip(paths, filenames), total=len(paths)):
        img = io.imread(path)
        adjusted = equalize_adapthist(img, kernel_size=kernel_size).astype(np.float32)
        io.imsave(os.path.join(args.output, filename), adjusted, compress=args.c)

    verbose_print(args, f"Contrast done!")
Exemplo n.º 23
0
def denoise_main(args):
    # Initial setup
    nb_workers = _check_workers(args)
    os.makedirs(args.output, exist_ok=True)

    # Find all TIFFs
    paths, _ = tifs_in_dir(args.input)
    verbose_print(args, f"Found {len(paths)} TIFFs")

    # Curry denoising function for pmap
    f = partial(denoise2d, sigma=args.s, wavelet=args.w)
    g = partial(read_process_write, f=f, output=args.output, compress=args.c)

    # Parallel read, denoise, write
    verbose_print(args, f"Denoising with {nb_workers} workers:")
    with multiprocessing.Pool(nb_workers) as pool:
        list(tqdm.tqdm(pool.imap(g, paths), total=len(paths)))

    verbose_print(args, f"Denoising done!")
Exemplo n.º 24
0
def downsample_main(args):
    if args.n is None:
        nb_workers = multiprocessing.cpu_count()
    else:
        nb_workers = args.n

    verbose_print(args,
                  f'Downsampling {args.input} with factors {args.factor}')

    if args.tiff:
        os.makedirs(args.output, exist_ok=True)
        paths, filenames = utils.tifs_in_dir(args.input)

        args_list = []
        for path, filename in zip(paths, filenames):
            args_list.append((path, args.factor, args.output, filename))
        with multiprocessing.Pool(nb_workers) as pool:
            pool.starmap(read_downsample_write, args_list)

        # for i, (path, filename) in enumerate(zip(paths, filenames)):
        #     verbose_print(args, f'Downsampling {filename}')
        #     arr = io.imread(path)
        #     if isinstance(args.factor, int):
        #         factors = tuple(args.factor for _ in range(arr.ndim))
        #     else:
        #         factors = tuple(args.factor)
        #     data = downsample(arr, factors)
        #     output = os.path.join(args.output, filename)
        #     io.imsave(output, data, compress=3)

    else:
        arr = io.open(args.input, mode='r')
        if isinstance(args.factor, int):
            factors = tuple(args.factor for _ in range(arr.ndim))
        else:
            factors = tuple(args.factor)
        data = downsample(arr, factors)
        verbose_print(args, f'Writing result to {args.output}')
        io.imsave(args.output, data, compress=3)

    verbose_print(args, f'Downsampling done!')
Exemplo n.º 25
0
def mesh_main(args):
    if args.g is not None:
        if len(args.g) == 1:
            sigma = args.g[0]
        else:
            sigma = tuple(args.g)

    if args.d is None:
        downsample_factor = 1
    else:
        downsample_factor = np.asarray(args.d)

    verbose_print(args, f'Meshing segmentation at {args.input}')

    # Calculate the downsampled voxel size
    voxel_orig = read_voxel_size(args.voxel_size)
    voxel_down = tuple(voxel_orig * downsample_factor)
    verbose_print(args, f'Original voxel size (um): {voxel_orig}')
    verbose_print(args, f'Downsampled voxel size (um): {voxel_down}')

    # Load segmentation
    seg = io.imread(args.input)

    # Smooth segmentation
    if args.g is not None:
        seg = smooth_segmentation(seg, sigma)
        verbose_print(args, f'Smoothed segmentation with sigma {sigma}')

    # Calculate mesh surface
    verts, faces, normals, values = marching_cubes(seg, args.l, voxel_down,
                                                   args.s)
    mesh = {
        'verts': verts,
        'faces': faces,
        'normals': normals,
        'values': values
    }
    verbose_print(args, f'Computed mesh with {len(normals)} normals')

    # Plot mesh
    if args.plot:
        plot_mesh(mesh['verts'], mesh['faces'])
        mlab.show()

    # Save mesh
    save_mesh(args.output, mesh)
    verbose_print(args, f'Mesh saved to {args.output}')

    verbose_print(args, 'Meshing done!')
Exemplo n.º 26
0
def ventricle_main(args):
    verbose_print(args, f'Segmenting ventricles in {args.input}')

    # Load the input image
    data = io.imread(args.input)

    # Load the model
    if args.model.endswith('.pt'):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # device = torch.device("cpu")
        model = load_model(args.model, device)
        model = model.eval()
        verbose_print(
            args,
            f'Pytorch model successfully loaded from {args.model} to {device} device'
        )
        # Segment the input image
        verbose_print(args, f'Segmentation progress:')
        output = segment_ventricles(model, data, args.t, device)
    elif args.model.endswith('.h5'):
        model = load_keras_model(args.model)
        verbose_print(args,
                      f'Kerass model successfully loaded from {args.model}')
        # Segment the input image
        verbose_print(args, f'Segmentation progress:')
        output = segment_ventricles_keras(model, data, args.t)

    # Remove border regions
    if args.exclude_border:
        verbose_print(args, f'Removing regions connected to image border')
        # This could also be done in 3D instead of slice-by-slice
        # I'm not sure if images will start in ventricle, so doing slice-by-slice to be safe
        img = np.zeros_like(output)
        for i, data in tqdm(enumerate(output), total=len(output)):
            img[i] = clear_border(data)
        output = img

    # Save the result to TIFF
    io.imsave(args.output, output, compress=3)
    verbose_print(args, f'Segmentation written to {args.output}')

    verbose_print(args, f'Ventricle segmentation done!')
Exemplo n.º 27
0
def fluorescence_main(args):
    if isinstance(args.inputs, list):
        inputs = args.inputs
    else:
        inputs = [args.inputs]

    nb_images = len(inputs)
    verbose_print(args, f'Passed {nb_images} images to measure fluorescence')

    # Load centroids
    centroids = np.load(args.centroids)

    # Initialize output arrays
    mfis = np.zeros((centroids.shape[0], nb_images))
    stdevs = np.zeros((centroids.shape[0], nb_images))
    for i, path in enumerate(inputs):
        # Open image
        arr = io.open(path, mode='r')
        shape, dtype, chunks = arr.shape, arr.dtype, arr.chunks
        verbose_print(args, f'Sampling from {path}: {shape} {dtype}')

        # Sample image
        if args.g is not None:
            # Perform smoothing in a temporary array
            verbose_print(args, f'Smoothing {path} with sigma {tuple(args.g)}')
            with tempfile.TemporaryDirectory(
                    prefix=os.path.abspath('.')) as temp_path:
                smoothed_arr = io.new_zarr(temp_path, shape, chunks, dtype)
                gaussian_blur_parallel(
                    arr, args.g, smoothed_arr, arr.chunks, args.o,
                    args.w)  # Too many workers gives Zarr race condition
                verbose_print(args,
                              f'Sampling fluorescence from smoothed {path}')
                intensities = nuclei_centered_intensities(smoothed_arr,
                                                          centroids,
                                                          args.r,
                                                          mode=args.m,
                                                          nb_workers=args.w)
            # Temporary array deleted when context ends
        else:
            intensities = nuclei_centered_intensities(arr,
                                                      centroids,
                                                      args.r,
                                                      mode=args.m,
                                                      nb_workers=args.w)

        # Compute statistics
        mfis[:, i] = calculate_mfi(intensities)
        stdevs[:, i] = calculate_stdev(intensities)

    # Make output folder
    os.makedirs(args.output, exist_ok=True)

    # Save numpy array of MFIs and stdevs
    mfi_path = os.path.join(args.output, 'nuclei_mfis.npy')
    np.save(mfi_path, mfis)
    verbose_print(args, f'MFIs written to {mfi_path}')

    stdev_path = os.path.join(args.output, 'nuclei_stdevs.npy')
    np.save(stdev_path, stdevs)
    verbose_print(args, f'StDevs written to {stdev_path}')

    # Save CSV containing morphologies for each detected centroid
    # sox2.zarr/ <-- forward slash makes os.path.basename eval to empty string
    # Can use os.path.dirname(path) to get sox2.zarr, then use basename on that
    basenames = [
        os.path.basename(os.path.dirname(path)).split('.')[0]
        for path in inputs
    ]
    csv_names = ['fluorescence_' + str(base) + '.csv' for base in basenames]
    csv_paths = [os.path.join(args.output, name) for name in csv_names]
    for i, (base, path) in enumerate(zip(basenames, csv_paths)):
        df = pd.DataFrame({'mfi': mfis[:, i], 'stdev': stdevs[:, i]})
        df.to_csv(path)
        verbose_print(args,
                      f'Fluorescence statistics for {base} written to {path}')

    verbose_print(args, f'Fluorescence measurements done!')
Exemplo n.º 28
0
def morphology_main(args):
    if args.n is None:
        nb_workers = multiprocessing.cpu_count()
    else:
        nb_workers = args.n

    if args.segmentations is not None:
        return_seg = True
    else:
        return_seg = False

    verbose_print(args, f'Computing morphological features for {args.input}')

    # Get window size
    window_size = np.asarray(args.w)
    verbose_print(args, f'Using window size of {window_size} around each cell')

    # Load the detected centroids and open binary segmentation
    centroids = np.load(
        args.centroids)  # TODO: Make this consider voxel dimensions
    binary_seg = io.open(args.input, mode='r')

    # Compute labeled segmentation and morphologies for each cell
    if return_seg:
        verbose_print(
            args,
            f'Computing segmentations and morphologies with {nb_workers} workers'
        )
    else:
        verbose_print(args,
                      f'Computing morphologies with {nb_workers} workers')
    args_list = [(centroid, window_size, binary_seg, return_seg)
                 for centroid in centroids]
    with multiprocessing.Pool(nb_workers) as pool:
        results = list(
            tqdm(pool.imap(_segment_centroid, args_list),
                 total=len(args_list)))
    # Unpack morphological features
    # features = np.array([center, volume, eq_diam, minor_length, major_length, axis_ratio])
    features = np.asarray([r[0] for r in results])  # N x feats
    centers_z = features[:, 0]
    centers_y = features[:, 1]
    centers_x = features[:, 2]
    volumes = features[:, 3]
    eq_diams = features[:, 4]
    minor_lengths = features[:, 5]
    major_lengths = features[:, 6]
    axis_ratios = features[:, 7]

    # Save each segmentation
    if return_seg:
        verbose_print(
            args, f'Saving single-cell segmentations to {args.segmentations}')
        singles = np.asarray([r[1] for r in results])
        np.savez_compressed(args.segmentations, singles)

    # Save CSV containing morphologies for each detected centroid
    data = {
        'com_z': centers_z,
        'com_y': centers_y,
        'com_x': centers_x,
        'volume': volumes,
        'eq_diam': eq_diams,
        'minor_length': minor_lengths,
        'major_length': major_lengths,
        'axis_ratio': axis_ratios
    }
    df = pd.DataFrame(data)
    df.to_csv(args.output)
    verbose_print(args, f'Morphological features written to {args.output}')

    verbose_print(args, f'Computing morphologies done!')
Exemplo n.º 29
0
def classify_main(args):
    verbose_print(
        args,
        f'Training KNN model based on {args.profiles_train} and {args.labels_train}'
    )

    # Load training data
    profiles_train = np.load(args.profiles_train)
    x_train = profiles_to_features(
        profiles_train,
        normalize=False)  # Normalizes the data (should we do this?)
    if args.umap is not None:
        model = joblib.load(args.umap)
        x_train = model.transform(x_train)
    y_train = np.load(args.labels_train)
    classes = np.unique(y_train)

    if args.load is None:
        verbose_print(args, f'Training new model')
        # Train model
        # Logistic regression model
        # clf = LogisticRegression(random_state=0,
        #                          solver='lbfgs',
        #                          multi_class='multinomial',
        #                          max_iter=200,
        #                          n_jobs=-1).fit(x_train, y_train)
        # KNN classifier
        clf = KNeighborsClassifier(n_neighbors=1)
        clf.fit(x_train, y_train)
        verbose_print(args,
                      f'Training accuracy: {clf.score(x_train, y_train):.4f}')
    else:
        verbose_print(args, f'Loading model from {args.load}')
        clf = joblib.load(args.load)

    if args.save is not None:
        verbose_print(args, f'Saving model to {args.save}')
        joblib.dump(clf, args.save)

    # Apply classifier
    profiles = np.load(args.profiles)
    x = profiles_to_features(profiles, normalize=False)
    if args.umap is not None:
        x = model.transform(x)
    labels = clf.predict(x)

    nb_cells = len(profiles)
    verbose_print(
        args,
        f'Classified {nb_cells} profiles into {len(classes)} cytoarchitecture classes'
    )
    for c in classes:
        count = len(np.where(labels == c)[0])
        verbose_print(
            args,
            f'Class {c}: {count:10d} profiles {100 * count / nb_cells:10.3f}%')

    # Save the niche labels
    np.save(args.labels, labels)
    verbose_print(args, f'Labels saved to {args.labels}')

    verbose_print(args, f'Classifying done!')
Exemplo n.º 30
0
def preprocess_image3d(args, img):
    # Background removal
    if args.t is not None:
        verbose_print(args, f"Performing background removal with threshold {args.t}")
        img = remove_background(img, args.t)

    # Histogram equalization
    if args.k is not None:
        if args.k == 0:
            verbose_print(args, f"Performing histogram equalization with default kernel size")
            kernel_size = None
        else:
            verbose_print(args, f"Performing histogram equalization with kernel size {args.k}")
            kernel_size = args.k
        img = clahe(img, kernel_size=kernel_size)

    # Normalize and convert to float
    if args.float:
        img = rescale_intensity(img_as_float32(img))
        verbose_print(args, f"Converted to normalized float32: min {img.min():.3f}, max {img.max():.3f}")

    # Denoising
    if args.s is not None:
        verbose_print(args, f"Performing noise removal with sigma {args.s} and wavelet {args.w}")
        img = denoise(img, args.s, args.w)

    # Convert to Zarr
    verbose_print(args, f"Saving result to {args.zarr}")
    arr = io.new_zarr(args.zarr, shape=img.shape, dtype=img.dtype, chunks=tuple(args.c))
    arr[:] = img

    return img