def iss_pipeline(fov, codebook): primary_image = fov.get_image(starfish.FieldOfView.PRIMARY_IMAGES) # register the raw image learn_translation = LearnTransform.Translation( reference_stack=fov.get_image('dots'), axes=Axes.ROUND, upsampling=100) transforms_list = learn_translation.run( primary_image.reduce({Axes.CH, Axes.ZPLANE}, func="max")) warp = ApplyTransform.Warp() registered = warp.run(primary_image, transforms_list=transforms_list, in_place=False, verbose=True) # filter raw data masking_radius = 15 filt = Filter.WhiteTophat(masking_radius, is_volume=False) filtered = filt.run(registered, verbose=True, in_place=False) bd = FindSpots.BlobDetector( min_sigma=1, max_sigma=10, num_sigma=30, threshold=0.01, measurement_type='mean', ) # detect spots using laplacian of gaussians approach dots_max = fov.get_image('dots').reduce((Axes.ROUND, Axes.ZPLANE), func="max", module=FunctionSource.np) # locate spots in a reference image spots = bd.run(reference_image=dots_max, image_stack=filtered) # decode the pixel traces using the codebook decoder = DecodeSpots.PerRoundMaxChannel(codebook=codebook) decoded = decoder.run(spots=spots) # segment cells seg = Segment.Watershed( nuclei_threshold=.16, input_threshold=.22, min_distance=57, ) label_image = seg.run(primary_image, fov.get_image('dots')) # assign spots to cells ta = AssignTargets.Label() assigned = ta.run(label_image, decoded) return assigned, label_image
def iss_pipeline(fov, codebook): primary_image = fov.get_image(starfish.FieldOfView.PRIMARY_IMAGES) # register the raw image learn_translation = LearnTransform.Translation( reference_stack=fov.get_image('dots'), axes=Axes.ROUND, upsampling=100) max_projector = Filter.Reduce((Axes.CH, Axes.ZPLANE), func="max", module=Filter.Reduce.FunctionSource.np) transforms_list = learn_translation.run(max_projector.run(primary_image)) warp = ApplyTransform.Warp() registered = warp.run(primary_image, transforms_list=transforms_list, in_place=False, verbose=True) # filter raw data masking_radius = 15 filt = Filter.WhiteTophat(masking_radius, is_volume=False) filtered = filt.run(registered, verbose=True, in_place=False) # detect spots using laplacian of gaussians approach p = DetectSpots.BlobDetector( min_sigma=1, max_sigma=10, num_sigma=30, threshold=0.01, measurement_type='mean', ) intensities = p.run(filtered, blobs_image=fov.get_image('dots'), blobs_axes=(Axes.ROUND, Axes.ZPLANE)) # decode the pixel traces using the codebook decoded = codebook.decode_per_round_max(intensities) # segment cells seg = Segment.Watershed( nuclei_threshold=.16, input_threshold=.22, min_distance=57, ) label_image = seg.run(primary_image, fov.get_image('dots')) # assign spots to cells ta = AssignTargets.Label() assigned = ta.run(label_image, decoded) return assigned, label_image
distance_threshold=0.5176, magnitude_threshold=1.77e-5, min_area=2, max_area=np.inf, ) initial_spot_intensities, prop_results = psd.run(filtered_imgs) # Select only decoded spots that pass thresholds and map to genes in codebook decoded = initial_spot_intensities.loc[initial_spot_intensities[Features.PASSES_THRESHOLDS]] decoded_filtered = decoded[decoded.target != 'nan'] # Load cell mask roi_path = os.path.join(os.path.dirname("__file__"), 'RoiSet.zip') masks = BinaryMaskCollection.from_fiji_roi_set(path_to_roi_set_zip=roi_path, original_image=dapi) # Assign spots to cells by labeling each spot with cell_id al = AssignTargets.Label() labeled = al.run(masks, decoded_filtered) # Filter out spots that are not located in any cell mask labeled_filtered = labeled[labeled.cell_id != 'nan'] ################################################################################################### # Now that every :term:`feature <Feature>` in the :py:class:`.DecodedIntensityTable` is labeled # with a valid ``cell_id``, the features can be grouped by cell into a single cell gene expression # matrix. In this matrix, each row is a cell and each column is a gene. The values within the # matrix are the number of features of that particular gene in that particular cell. # Transform to expression matrix and show first 12 genes mat = labeled_filtered.to_expression_matrix() mat.to_pandas().iloc[:, 0:12].astype(int)
def make_expression_matrix(masks, decoded): al = AssignTargets.Label() labeled = al.run(masks, decoded[decoded.target != 'nan']) cg = labeled[labeled.cell_id != 'nan'].to_expression_matrix() return cg
def test_iss_pipeline_cropped_data(tmpdir): # set random seed to errors provoked by optimization functions np.random.seed(777) iss = __import__('ISS') white_top_hat_filtered_image = iss.filtered_imgs # # pick a random part of the registered image and assert on it expected_filtered_values = np.array( [[0.1041123, 0.09968718, 0.09358358, 0.09781034, 0.08943313, 0.08853284, 0.08714428, 0.07518119, 0.07139697, 0.0585336, ], [0.09318685, 0.09127947, 0.0890364, 0.094728, 0.08799877, 0.08693064, 0.08230717, 0.06738383, 0.05857938, 0.04223698], [0.08331426, 0.0812543, 0.08534371, 0.0894789, 0.09184404, 0.08274967, 0.0732433, 0.05564965, 0.04577706, 0.03411917], [0.06741435, 0.07370108, 0.06511024, 0.07193103, 0.07333485, 0.07672236, 0.06019684, 0.04415961, 0.03649958, 0.02737468], [0.05780118, 0.06402685, 0.05947966, 0.05598535, 0.05192646, 0.04870679, 0.04164187, 0.03291371, 0.03030441, 0.02694743], [0.04649424, 0.06117342, 0.05899138, 0.05101091, 0.03639277, 0.03379873, 0.03382925, 0.0282597, 0.02383459, 0.01651026], [0.0414435, 0.04603647, 0.05458152, 0.04969863, 0.03799496, 0.0325475, 0.02928206, 0.02685588, 0.02172885, 0.01722743], [0.04107728, 0.04161135, 0.04798963, 0.05156023, 0.03952087, 0.02899214, 0.02589456, 0.02824444, 0.01815823, 0.01557945], [0.03901731, 0.03302052, 0.03498893, 0.03929199, 0.03695735, 0.02943466, 0.01945525, 0.01869231, 0.01666284, 0.01240558], [0.02664226, 0.02386511, 0.02206454, 0.02978561, 0.03265431, 0.0265507, 0.02214084, 0.01844815, 0.01542687, 0.01353475]], dtype=np.float32 ) assert white_top_hat_filtered_image.xarray.dtype == np.float32 assert np.allclose( expected_filtered_values, white_top_hat_filtered_image.xarray[2, 2, 0, 40:50, 40:50] ) registered_image = iss.registered_imgs expected_registered_values = np.array( [[9.972601e-03, 4.410370e-03, 3.392192e-03, 1.687834e-03, 1.880155e-04, 0.000000e+00, 1.047019e-04, 1.578360e-05, 1.069453e-03, 6.543968e-03], [1.456979e-02, 9.646147e-03, 8.203185e-03, 5.936079e-03, 1.839891e-03, 3.569032e-04, 5.237808e-04, 3.792955e-04, 4.592746e-05, 1.088151e-03], [2.313178e-02, 1.586836e-02, 1.240375e-02, 9.513815e-03, 3.563545e-03, 1.488329e-03, 1.326624e-03, 2.939297e-04, 5.607218e-04, 3.690171e-03], [3.531289e-02, 2.446796e-02, 1.964004e-02, 1.258251e-02, 7.771713e-03, 4.918387e-03, 2.766922e-03, 3.267574e-04, 4.892451e-04, 5.261183e-03], [5.146676e-02, 3.794888e-02, 3.141785e-02, 2.312119e-02, 1.555709e-02, 9.402979e-03, 6.135746e-03, 7.547007e-04, 1.231891e-03, 2.656648e-03], [5.952225e-02, 5.170041e-02, 4.459279e-02, 3.416265e-02, 2.403326e-02, 1.659481e-02, 1.189285e-02, 4.377660e-03, 1.810592e-03, 1.729033e-03], [5.872828e-02, 5.881007e-02, 5.405803e-02, 4.143796e-02, 3.181438e-02, 2.468321e-02, 1.451422e-02, 6.834699e-03, 6.021897e-03, 2.588449e-03], [4.815195e-02, 5.578594e-02, 5.535153e-02, 4.701486e-02, 3.499170e-02, 2.584777e-02, 1.871042e-02, 1.036013e-02, 8.698075e-03, 2.945077e-03], [4.108098e-02, 4.543370e-02, 4.911040e-02, 4.965232e-02, 4.022935e-02, 2.973786e-02, 1.956365e-02, 1.386791e-02, 8.811617e-03, 6.941982e-03], [3.560406e-02, 3.779930e-02, 4.068928e-02, 4.668610e-02, 4.536487e-02, 3.364870e-02, 2.244582e-02, 1.683235e-02, 1.113740e-02, 1.012298e-02]], dtype=np.float32 ) assert np.allclose( expected_registered_values, registered_image.xarray[2, 2, 0, 40:50, 40:50] ) pipeline_log = registered_image.log.data assert pipeline_log[0]['method'] == 'WhiteTophat' assert pipeline_log[1]['method'] == 'Warp' # decode decoded = iss.decoded # decoding identifies 4 genes, each with 1 count genes, gene_counts = iss.genes, iss.counts assert np.array_equal(genes, np.array(['ACTB', 'CD68', 'CTSL2', 'EPCAM', 'ETV4', 'GAPDH', 'GUS', 'HER2', 'RAC1', 'TFRC', 'TP53', 'VEGF'])) assert np.array_equal(gene_counts, [19, 1, 5, 2, 1, 11, 1, 3, 2, 1, 1, 2]) assert decoded.sizes[Features.AXIS] == 99 masks = iss.masks # segmentation identifies only one cell assert len(iss.watershed_markers) == 6 # assign targets lab = AssignTargets.Label() assigned = lab.run(masks, decoded) pipeline_log = assigned.get_log() # assert tht physical coordinates were transferred assert Coordinates.X in assigned.coords assert Coordinates.Y in assigned.coords assert Coordinates.Z in assigned.coords assert pipeline_log[0]['method'] == 'WhiteTophat' assert pipeline_log[1]['method'] == 'Warp' assert pipeline_log[2]['method'] == 'BlobDetector' assert pipeline_log[3]['method'] == 'PerRoundMaxChannel' # Test serialization / deserialization of IntensityTable log with tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) as ntf: tfp = ntf.name assigned.to_netcdf(tfp) loaded_intensities = IntensityTable.open_netcdf(tfp) pipeline_log = loaded_intensities.get_log() assert pipeline_log[0]['method'] == 'WhiteTophat' assert pipeline_log[1]['method'] == 'Warp' assert pipeline_log[2]['method'] == 'BlobDetector' assert pipeline_log[3]['method'] == 'PerRoundMaxChannel' # 28 of the spots are assigned to cell 0 (although most spots do not decode!) assert np.sum(assigned['cell_id'] == '1') == 28
def run( input_loc: Path, exp_loc: Path, output_loc: str, fov_count: int, aux_name: str, roiKwargs: dict, labeledKwargs: dict, watershedKwargs: dict, ): """ Main class for generating and applying masks then saving output. Parameters ---------- input_loc: Path Location of input cdf files, as formatted by starfishRunner.cwl exp_loc: Path Directory that contains "experiment.json" file for the experiment. output_loc: str Path to directory where output will be saved. fov_count: int The number of FOVs in the experiment. aux_name: str The name of the auxillary view to look at for image segmentation. roiKwargs: dict Dictionary with arguments for reading in masks from an RoiSet. See masksFromRoi. labeledKwargs: dict Dictionary with arguments for reading in masks from a labeled image. See masksFromLabeledImages. watershedKwargs: dict Dictionary with arguments for running basic watershed pipeline. See masksFromWatershed. """ if not path.isdir(output_dir): makedirs(output_dir) # disabling tdqm for pipeline runs tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) # redirecting output to log reporter = open( path.join( output_dir, datetime.now().strftime("%Y%m%d_%H%M_starfish_segmenter.log")), "w", ) sys.stdout = reporter sys.stderr = reporter # if not path.isdir(output_dir + "csv/"): # makedirs(output_dir + "csv") # print("made " + output_dir + "csv") # if not path.isdir(output_dir + "cdf/"): # makedirs(output_dir + "cdf") # print("made " + output_dir + "cdf") # if not path.isdir(output_dir + "h5ad/"): # makedirs(output_dir + "h5ad") # print("made " + output_dir + "h5ad") # read in netcdfs based on how we saved prev step results = [] keys = [] masks = [] for f in glob("{}/cdf/*_decoded.cdf".format(input_loc)): results.append(DecodedIntensityTable.open_netcdf(f)) name = f[len(str(input_loc)) + 5:-12] print("found fov key: " + name) keys.append(name) print("loaded " + f) if not path.isdir(output_dir + name): makedirs(output_dir + name) print("made " + output_dir + name) # load in the images we want to look at exp = starfish.core.experiment.experiment.Experiment.from_json( str(exp_loc / "experiment.json")) print("loaded " + str(exp_loc / "experiment.json")) img_stack = [] for key in exp.keys(): print("looking at " + key + ", " + aux_name) cur_img = exp[key].get_image(aux_name) img_stack.append(cur_img) # determine how we generate mask, then make it if len(roiKwargs.keys()) > 0: # then apply roi print("applying Roi mask") masks = masksFromRoi(img_stack, **roiKwargs) elif len(labeledKwargs.keys()) > 0: # then apply images print("applying labeled image mask") masks = masksFromLabeledImages(img_stack, **labeledKwargs) elif len(watershedKwargs.keys()) > 0: # then go thru watershed pipeline print("running basic threshold and watershed pipeline") masks = masksFromWatershed(img_stack, **watershedKwargs) else: # throw error raise Exception("Parameters do not specify means of defining mask.") # save masks to tiffs for later processing for i in range(len(masks)): binmask = masks[i].to_label_image().xarray.values while len(binmask.shape) > 2: binmask = np.sum(binmask, axis=0) binmask = binmask > 0 PIL.Image.fromarray(binmask).save("{}/{}/mask.tiff".format( output_dir, keys[i])) # apply mask to tables, save results al = AssignTargets.Label() for i in range(fov_count): labeled = al.run(masks[i], results[i]) # labeled = labeled[labeled.cell_id != "nan"] labeled.to_decoded_dataframe().save_csv(output_dir + keys[i] + "/segmentation.csv") labeled.to_netcdf(output_dir + keys[i] + "/df_segmented.cdf") labeled.to_expression_matrix().to_pandas().to_csv(output_dir + keys[i] + "/exp_segmented.csv") labeled.to_expression_matrix().save(output_dir + keys[i] + "/exp_segmented.cdf") labeled.to_expression_matrix().save_anndata(output_dir + keys[i] + "/exp_segmented.h5ad") print("saved fov key: {}, index {}".format(keys[i], i)) sys.stdout = sys.__stdout__