def test_csv_numpy(dtype, delimiter): # build a temporary directory with tempfile.TemporaryDirectory() as tmp_dir: # save arrays in csv file test_array = np.zeros((10, 2), dtype=dtype) path = os.path.join(tmp_dir, "test_array.csv") stack.save_data_to_csv(test_array, path, delimiter=delimiter) # read csv file array = stack.read_array_from_csv(path, delimiter=delimiter) assert array.dtype == np.float64 array = stack.read_array_from_csv(path, dtype, delimiter) assert_array_equal(test_array, array) assert array.dtype == dtype
def fct_to_process(i, n): # simulate images image, ground_truth = spots.simulate_image( image_shape=image_shape, image_dtype=image_dtype, subpixel_factors=subpixel_factors, voxel_size_z=voxel_size_z, voxel_size_yx=voxel_size_yx, n_spots=n, random_n_spots=random_n_spots, n_clusters=n_clusters, random_n_clusters=random_n_clusters, n_spots_cluster=n_spots_cluster, random_n_spots_cluster=random_n_spots_cluster, centered_cluster=False, sigma_z=sigma_z, sigma_yx=sigma_yx, random_sigma=random_sigma, amplitude=amplitude, random_amplitude=random_amplitude, noise_level=noise_level, random_noise=random_noise) # save image and ground truth path = os.path.join(path_directory_image, "image_{0}.tif".format(i)) stack.save_image(image, path) path = os.path.join(path_directory_gt, "gt_{0}.csv".format(i)) stack.save_data_to_csv(ground_truth, path) # plot path = os.path.join(path_directory_plot, "plot_{0}.png".format(i)) subpixel = True if subpixel_factors is not None else False plot.plot_spots( image, ground_truth=None, prediction=None, subpixel=subpixel, rescale=True, contrast=False, title="Number of mRNAs: {0}".format(len(ground_truth)), framesize=(8, 8), remove_frame=False, path_output=path, ext="png", show=False) return
def fct_to_process(i, n): # simulate images image, ground_truth = sim.simulate_image( ndim=ndim, n_spots=n_spots, random_n_spots=random_n_spots, n_clusters=n_clusters, random_n_clusters=random_n_clusters, n_spots_cluster=n, random_n_spots_cluster=random_n_spots_cluster, centered_cluster=centered_cluster, image_shape=image_shape, image_dtype=np.uint16, subpixel_factors=subpixel_factors, voxel_size=voxel_size, sigma=sigma, random_sigma=random_sigma, amplitude=amplitude, random_amplitude=random_amplitude, noise_level=noise_level, random_noise=random_noise) # save image path = os.path.join(path_directory_image, "image_{0}.tif".format(i)) stack.save_image(image, path) # complete ground truth and save it new_column = np.array([n] * len(ground_truth)) new_column = new_column[:, np.newaxis] ground_truth = np.hstack([ground_truth, new_column]) path = os.path.join(path_directory_gt, "gt_{0}.csv".format(i)) stack.save_data_to_csv(ground_truth, path) # plot path = os.path.join(path_directory_plot, "plot_{0}.png".format(i)) image_mip = stack.maximum_projection(image) plot.plot_images( images=image_mip, rescale=True, titles=["Number of spots: {0}".format(len(ground_truth))], framesize=(8, 8), remove_frame=False, path_output=path, show=False) return
def test_csv_pandas(delimiter): # build a temporary directory with tempfile.TemporaryDirectory() as tmp_dir: # save pandas objects in csv file test_series = pd.Series([0.1, 2.5, 3.7], name="a") test_dataframe = pd.DataFrame({ "a": [0, 2, 3], "b": [0.1, 2.5, 3.7], "c": ["dog", "cat", "bird"] }) path = os.path.join(tmp_dir, "test_series.csv") stack.save_data_to_csv(test_series, path, delimiter=delimiter) path = os.path.join(tmp_dir, "test_dataframe.csv") stack.save_data_to_csv(test_dataframe, path, delimiter=delimiter) # read csv files path = os.path.join(tmp_dir, "test_series.csv") df = stack.read_dataframe_from_csv(path, delimiter=delimiter) pd.testing.assert_frame_equal(test_series.to_frame(), df) path = os.path.join(tmp_dir, "test_dataframe.csv") df = stack.read_dataframe_from_csv(path, delimiter=delimiter) pd.testing.assert_frame_equal(test_dataframe, df)
def summarize_extraction_results(fov_results, ndim, path_output=None, delimiter=";"): """Summarize results extracted from an image and store them in a dataframe. Parameters ---------- fov_results : List[Dict] List of dictionaries, one per cell segmented in the image. Each dictionary includes information about the cell (image, masks, coordinates arrays). Minimal information are: * `cell_id`: Unique id of the cell. * `bbox`: bounding box coordinates with the order (`min_y`, `min_x`, `max_y`, `max_x`). * `cell_coord`: boundary coordinates of the cell. * `cell_mask`: mask of the cell. ndim : int Number of spatial dimensions to consider (2 or 3). path_output : str, optional Path to save the dataframe in a csv file. delimiter : str, default=";" Delimiter used to separate columns if the dataframe is saved in a csv file. Returns ------- df : pd.DataFrame Dataframe with summarized results from the field of view, at the cell level. At least `cell_id` (Unique id of the cell) is returned. Other indicators are summarized if available: * `nb_rna`: Number of detected rna in the cell. * `nb_rna_in_nuc`: Number of detected rna inside the nucleus. * `nb_rna_out_nuc`: Number of detected rna outside the nucleus. Extra coordinates elements detected are counted in the cell and summarized as well. """ # check parameters stack.check_parameter(fov_results=list, ndim=int, path_output=(str, type(None))) # case if no cell were detected # TODO make it consistent with the case where there are cells if len(fov_results) == 0: df = pd.DataFrame({"cell_id": []}) if path_output is not None: stack.save_data_to_csv(df, path_output, delimiter) return df # check extra coordinates to summarize cell_results = fov_results[0] _extra_coord = {} for key in cell_results: if key in [ "cell_id", "bbox", "cell_coord", "cell_mask", "nuc_coord", "nuc_mask", "rna_coord", "image" ]: continue others_coord = cell_results[key] if (not isinstance(others_coord, np.ndarray) or others_coord.dtype not in [np.int64, np.float64]): continue _extra_coord[key] = [] # summarize results at the cell level _cell_id = [] _nb_rna = [] _nb_rna_in_nuc = [] _nb_rna_out_nuc = [] for cell_results in fov_results: # get cell id _cell_id.append(cell_results["cell_id"]) # get rna coordinates and relative results if "rna_coord" in cell_results: rna_coord = cell_results["rna_coord"] _nb_rna.append(len(rna_coord)) # get rna in nucleus if "nuc_mask" in cell_results: nuc_mask = cell_results["nuc_mask"] rna_in_nuc, rna_out_nuc = identify_objects_in_region( nuc_mask, rna_coord, ndim) _nb_rna_in_nuc.append(len(rna_in_nuc)) _nb_rna_out_nuc.append(len(rna_out_nuc)) # get others coordinates for key in _extra_coord: others_coord = cell_results[key] _extra_coord[key].append(len(others_coord)) # complete missing mandatory results n = len(_cell_id) if len(_nb_rna) == 0: _nb_rna = [np.nan] * n if len(_nb_rna_in_nuc) == 0: _nb_rna_in_nuc = [np.nan] * n if len(_nb_rna_out_nuc) == 0: _nb_rna_out_nuc = [np.nan] * n # store minimum results in a dataframe result_summary = { "cell_id": _cell_id, "nb_rna": _nb_rna, "nb_rna_in_nuc": _nb_rna_in_nuc, "nb_rna_out_nuc": _nb_rna_out_nuc } # store available results on nucleus and rna if len(_nb_rna) > 0: result_summary["nb_rna"] = _nb_rna if len(_nb_rna_in_nuc) > 0: result_summary["nb_rna_in_nuc"] = _nb_rna_in_nuc if len(_nb_rna_out_nuc) > 0: result_summary["nb_rna_out_nuc"] = _nb_rna_out_nuc # store results from others elements detected in the cell for key in _extra_coord: result_summary["nb_{0}".format(key)] = _extra_coord[key] # instantiate dataframe df = pd.DataFrame(result_summary) # save dataframe if path_output is not None: stack.save_data_to_csv(df, path_output, delimiter) return df