Exemplo n.º 1
0
def iss_pipeline(fov, codebook):
    primary_image = fov.get_image(starfish.FieldOfView.PRIMARY_IMAGES)

    # register the raw image
    learn_translation = LearnTransform.Translation(
        reference_stack=fov.get_image('dots'), axes=Axes.ROUND, upsampling=100)
    transforms_list = learn_translation.run(
        primary_image.reduce({Axes.CH, Axes.ZPLANE}, func="max"))
    warp = ApplyTransform.Warp()
    registered = warp.run(primary_image,
                          transforms_list=transforms_list,
                          in_place=False,
                          verbose=True)

    # filter raw data
    masking_radius = 15
    filt = Filter.WhiteTophat(masking_radius, is_volume=False)
    filtered = filt.run(registered, verbose=True, in_place=False)

    bd = FindSpots.BlobDetector(
        min_sigma=1,
        max_sigma=10,
        num_sigma=30,
        threshold=0.01,
        measurement_type='mean',
    )

    # detect spots using laplacian of gaussians approach
    dots_max = fov.get_image('dots').reduce((Axes.ROUND, Axes.ZPLANE),
                                            func="max",
                                            module=FunctionSource.np)
    # locate spots in a reference image
    spots = bd.run(reference_image=dots_max, image_stack=filtered)

    # decode the pixel traces using the codebook
    decoder = DecodeSpots.PerRoundMaxChannel(codebook=codebook)
    decoded = decoder.run(spots=spots)

    # segment cells
    seg = Segment.Watershed(
        nuclei_threshold=.16,
        input_threshold=.22,
        min_distance=57,
    )
    label_image = seg.run(primary_image, fov.get_image('dots'))

    # assign spots to cells
    ta = AssignTargets.Label()
    assigned = ta.run(label_image, decoded)

    return assigned, label_image
Exemplo n.º 2
0
def iss_pipeline(fov, codebook):
    primary_image = fov.get_image(starfish.FieldOfView.PRIMARY_IMAGES)

    # register the raw image
    learn_translation = LearnTransform.Translation(
        reference_stack=fov.get_image('dots'), axes=Axes.ROUND, upsampling=100)
    max_projector = Filter.Reduce((Axes.CH, Axes.ZPLANE),
                                  func="max",
                                  module=Filter.Reduce.FunctionSource.np)
    transforms_list = learn_translation.run(max_projector.run(primary_image))
    warp = ApplyTransform.Warp()
    registered = warp.run(primary_image,
                          transforms_list=transforms_list,
                          in_place=False,
                          verbose=True)

    # filter raw data
    masking_radius = 15
    filt = Filter.WhiteTophat(masking_radius, is_volume=False)
    filtered = filt.run(registered, verbose=True, in_place=False)

    # detect spots using laplacian of gaussians approach
    p = DetectSpots.BlobDetector(
        min_sigma=1,
        max_sigma=10,
        num_sigma=30,
        threshold=0.01,
        measurement_type='mean',
    )

    intensities = p.run(filtered,
                        blobs_image=fov.get_image('dots'),
                        blobs_axes=(Axes.ROUND, Axes.ZPLANE))

    # decode the pixel traces using the codebook
    decoded = codebook.decode_per_round_max(intensities)

    # segment cells
    seg = Segment.Watershed(
        nuclei_threshold=.16,
        input_threshold=.22,
        min_distance=57,
    )
    label_image = seg.run(primary_image, fov.get_image('dots'))

    # assign spots to cells
    ta = AssignTargets.Label()
    assigned = ta.run(label_image, decoded)

    return assigned, label_image
Exemplo n.º 3
0
    distance_threshold=0.5176,
    magnitude_threshold=1.77e-5,
    min_area=2,
    max_area=np.inf,
)
initial_spot_intensities, prop_results = psd.run(filtered_imgs)
# Select only decoded spots that pass thresholds and map to genes in codebook
decoded = initial_spot_intensities.loc[initial_spot_intensities[Features.PASSES_THRESHOLDS]]
decoded_filtered = decoded[decoded.target != 'nan']

# Load cell mask
roi_path = os.path.join(os.path.dirname("__file__"), 'RoiSet.zip')
masks = BinaryMaskCollection.from_fiji_roi_set(path_to_roi_set_zip=roi_path, original_image=dapi)

# Assign spots to cells by labeling each spot with cell_id
al = AssignTargets.Label()
labeled = al.run(masks, decoded_filtered)

# Filter out spots that are not located in any cell mask
labeled_filtered = labeled[labeled.cell_id != 'nan']

###################################################################################################
# Now that every :term:`feature <Feature>` in the :py:class:`.DecodedIntensityTable` is labeled
# with a valid ``cell_id``, the features can be grouped by cell into a single cell gene expression
# matrix. In this matrix, each row is a cell and each column is a gene. The values within the
# matrix are the number of features of that particular gene in that particular cell.

# Transform to expression matrix and show first 12 genes
mat = labeled_filtered.to_expression_matrix()
mat.to_pandas().iloc[:, 0:12].astype(int)
Exemplo n.º 4
0
def make_expression_matrix(masks, decoded):
    al = AssignTargets.Label()
    labeled = al.run(masks, decoded[decoded.target != 'nan'])
    cg = labeled[labeled.cell_id != 'nan'].to_expression_matrix()
    return cg
Exemplo n.º 5
0
def test_iss_pipeline_cropped_data(tmpdir):

    # set random seed to errors provoked by optimization functions
    np.random.seed(777)

    iss = __import__('ISS')

    white_top_hat_filtered_image = iss.filtered_imgs

    # # pick a random part of the registered image and assert on it
    expected_filtered_values = np.array(
        [[0.1041123, 0.09968718, 0.09358358, 0.09781034, 0.08943313, 0.08853284,
          0.08714428, 0.07518119, 0.07139697, 0.0585336, ],
         [0.09318685, 0.09127947, 0.0890364, 0.094728, 0.08799877, 0.08693064,
          0.08230717, 0.06738383, 0.05857938, 0.04223698],
         [0.08331426, 0.0812543, 0.08534371, 0.0894789, 0.09184404, 0.08274967,
          0.0732433, 0.05564965, 0.04577706, 0.03411917],
         [0.06741435, 0.07370108, 0.06511024, 0.07193103, 0.07333485, 0.07672236,
          0.06019684, 0.04415961, 0.03649958, 0.02737468],
         [0.05780118, 0.06402685, 0.05947966, 0.05598535, 0.05192646, 0.04870679,
          0.04164187, 0.03291371, 0.03030441, 0.02694743],
         [0.04649424, 0.06117342, 0.05899138, 0.05101091, 0.03639277, 0.03379873,
          0.03382925, 0.0282597, 0.02383459, 0.01651026],
         [0.0414435, 0.04603647, 0.05458152, 0.04969863, 0.03799496, 0.0325475,
          0.02928206, 0.02685588, 0.02172885, 0.01722743],
         [0.04107728, 0.04161135, 0.04798963, 0.05156023, 0.03952087, 0.02899214,
          0.02589456, 0.02824444, 0.01815823, 0.01557945],
         [0.03901731, 0.03302052, 0.03498893, 0.03929199, 0.03695735, 0.02943466,
          0.01945525, 0.01869231, 0.01666284, 0.01240558],
         [0.02664226, 0.02386511, 0.02206454, 0.02978561, 0.03265431, 0.0265507,
          0.02214084, 0.01844815, 0.01542687, 0.01353475]],
        dtype=np.float32
    )

    assert white_top_hat_filtered_image.xarray.dtype == np.float32

    assert np.allclose(
        expected_filtered_values,
        white_top_hat_filtered_image.xarray[2, 2, 0, 40:50, 40:50]
    )

    registered_image = iss.registered_imgs

    expected_registered_values = np.array(
        [[9.972601e-03, 4.410370e-03, 3.392192e-03, 1.687834e-03, 1.880155e-04,
          0.000000e+00, 1.047019e-04, 1.578360e-05, 1.069453e-03, 6.543968e-03],
         [1.456979e-02, 9.646147e-03, 8.203185e-03, 5.936079e-03, 1.839891e-03,
          3.569032e-04, 5.237808e-04, 3.792955e-04, 4.592746e-05, 1.088151e-03],
         [2.313178e-02, 1.586836e-02, 1.240375e-02, 9.513815e-03, 3.563545e-03,
          1.488329e-03, 1.326624e-03, 2.939297e-04, 5.607218e-04, 3.690171e-03],
         [3.531289e-02, 2.446796e-02, 1.964004e-02, 1.258251e-02, 7.771713e-03,
          4.918387e-03, 2.766922e-03, 3.267574e-04, 4.892451e-04, 5.261183e-03],
         [5.146676e-02, 3.794888e-02, 3.141785e-02, 2.312119e-02, 1.555709e-02,
          9.402979e-03, 6.135746e-03, 7.547007e-04, 1.231891e-03, 2.656648e-03],
         [5.952225e-02, 5.170041e-02, 4.459279e-02, 3.416265e-02, 2.403326e-02,
          1.659481e-02, 1.189285e-02, 4.377660e-03, 1.810592e-03, 1.729033e-03],
         [5.872828e-02, 5.881007e-02, 5.405803e-02, 4.143796e-02, 3.181438e-02,
          2.468321e-02, 1.451422e-02, 6.834699e-03, 6.021897e-03, 2.588449e-03],
         [4.815195e-02, 5.578594e-02, 5.535153e-02, 4.701486e-02, 3.499170e-02,
          2.584777e-02, 1.871042e-02, 1.036013e-02, 8.698075e-03, 2.945077e-03],
         [4.108098e-02, 4.543370e-02, 4.911040e-02, 4.965232e-02, 4.022935e-02,
          2.973786e-02, 1.956365e-02, 1.386791e-02, 8.811617e-03, 6.941982e-03],
         [3.560406e-02, 3.779930e-02, 4.068928e-02, 4.668610e-02, 4.536487e-02,
          3.364870e-02, 2.244582e-02, 1.683235e-02, 1.113740e-02, 1.012298e-02]],
        dtype=np.float32
    )

    assert np.allclose(
        expected_registered_values,
        registered_image.xarray[2, 2, 0, 40:50, 40:50]
    )

    pipeline_log = registered_image.log.data

    assert pipeline_log[0]['method'] == 'WhiteTophat'
    assert pipeline_log[1]['method'] == 'Warp'

    # decode
    decoded = iss.decoded

    # decoding identifies 4 genes, each with 1 count
    genes, gene_counts = iss.genes, iss.counts
    assert np.array_equal(genes, np.array(['ACTB', 'CD68', 'CTSL2', 'EPCAM',
                                           'ETV4', 'GAPDH', 'GUS', 'HER2', 'RAC1',
                                           'TFRC', 'TP53', 'VEGF']))

    assert np.array_equal(gene_counts, [19, 1, 5, 2, 1, 11, 1, 3, 2, 1, 1, 2])
    assert decoded.sizes[Features.AXIS] == 99

    masks = iss.masks

    # segmentation identifies only one cell
    assert len(iss.watershed_markers) == 6

    # assign targets
    lab = AssignTargets.Label()
    assigned = lab.run(masks, decoded)

    pipeline_log = assigned.get_log()

    # assert tht physical coordinates were transferred
    assert Coordinates.X in assigned.coords
    assert Coordinates.Y in assigned.coords
    assert Coordinates.Z in assigned.coords

    assert pipeline_log[0]['method'] == 'WhiteTophat'
    assert pipeline_log[1]['method'] == 'Warp'
    assert pipeline_log[2]['method'] == 'BlobDetector'
    assert pipeline_log[3]['method'] == 'PerRoundMaxChannel'

    # Test serialization / deserialization of IntensityTable log
    with tempfile.NamedTemporaryFile(dir=tmpdir, delete=False) as ntf:
        tfp = ntf.name
    assigned.to_netcdf(tfp)
    loaded_intensities = IntensityTable.open_netcdf(tfp)
    pipeline_log = loaded_intensities.get_log()

    assert pipeline_log[0]['method'] == 'WhiteTophat'
    assert pipeline_log[1]['method'] == 'Warp'
    assert pipeline_log[2]['method'] == 'BlobDetector'
    assert pipeline_log[3]['method'] == 'PerRoundMaxChannel'

    # 28 of the spots are assigned to cell 0 (although most spots do not decode!)
    assert np.sum(assigned['cell_id'] == '1') == 28
def run(
    input_loc: Path,
    exp_loc: Path,
    output_loc: str,
    fov_count: int,
    aux_name: str,
    roiKwargs: dict,
    labeledKwargs: dict,
    watershedKwargs: dict,
):
    """
    Main class for generating and applying masks then saving output.

    Parameters
    ----------
    input_loc: Path
        Location of input cdf files, as formatted by starfishRunner.cwl
    exp_loc: Path
        Directory that contains "experiment.json" file for the experiment.
    output_loc: str
        Path to directory where output will be saved.
    fov_count: int
        The number of FOVs in the experiment.
    aux_name: str
        The name of the auxillary view to look at for image segmentation.
    roiKwargs: dict
        Dictionary with arguments for reading in masks from an RoiSet. See masksFromRoi.
    labeledKwargs: dict
        Dictionary with arguments for reading in masks from a labeled image. See masksFromLabeledImages.
    watershedKwargs: dict
        Dictionary with arguments for running basic watershed pipeline. See masksFromWatershed.
    """

    if not path.isdir(output_dir):
        makedirs(output_dir)

    # disabling tdqm for pipeline runs
    tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)

    # redirecting output to log
    reporter = open(
        path.join(
            output_dir,
            datetime.now().strftime("%Y%m%d_%H%M_starfish_segmenter.log")),
        "w",
    )
    sys.stdout = reporter
    sys.stderr = reporter

    # if not path.isdir(output_dir + "csv/"):
    #    makedirs(output_dir + "csv")
    #    print("made " + output_dir + "csv")

    # if not path.isdir(output_dir + "cdf/"):
    #    makedirs(output_dir + "cdf")
    #    print("made " + output_dir + "cdf")

    # if not path.isdir(output_dir + "h5ad/"):
    #    makedirs(output_dir + "h5ad")
    #    print("made " + output_dir + "h5ad")

    # read in netcdfs based on how we saved prev step
    results = []
    keys = []
    masks = []
    for f in glob("{}/cdf/*_decoded.cdf".format(input_loc)):
        results.append(DecodedIntensityTable.open_netcdf(f))
        name = f[len(str(input_loc)) + 5:-12]
        print("found fov key: " + name)
        keys.append(name)
        print("loaded " + f)
        if not path.isdir(output_dir + name):
            makedirs(output_dir + name)
            print("made " + output_dir + name)

    # load in the images we want to look at
    exp = starfish.core.experiment.experiment.Experiment.from_json(
        str(exp_loc / "experiment.json"))
    print("loaded " + str(exp_loc / "experiment.json"))

    img_stack = []
    for key in exp.keys():
        print("looking at " + key + ", " + aux_name)
        cur_img = exp[key].get_image(aux_name)
        img_stack.append(cur_img)

    # determine how we generate mask, then make it
    if len(roiKwargs.keys()) > 0:
        # then apply roi
        print("applying Roi mask")
        masks = masksFromRoi(img_stack, **roiKwargs)
    elif len(labeledKwargs.keys()) > 0:
        # then apply images
        print("applying labeled image mask")
        masks = masksFromLabeledImages(img_stack, **labeledKwargs)
    elif len(watershedKwargs.keys()) > 0:
        # then go thru watershed pipeline
        print("running basic threshold and watershed pipeline")
        masks = masksFromWatershed(img_stack, **watershedKwargs)
    else:
        # throw error
        raise Exception("Parameters do not specify means of defining mask.")

    # save masks to tiffs for later processing
    for i in range(len(masks)):
        binmask = masks[i].to_label_image().xarray.values
        while len(binmask.shape) > 2:
            binmask = np.sum(binmask, axis=0)
        binmask = binmask > 0
        PIL.Image.fromarray(binmask).save("{}/{}/mask.tiff".format(
            output_dir, keys[i]))

    # apply mask to tables, save results
    al = AssignTargets.Label()
    for i in range(fov_count):
        labeled = al.run(masks[i], results[i])
        # labeled = labeled[labeled.cell_id != "nan"]
        labeled.to_decoded_dataframe().save_csv(output_dir + keys[i] +
                                                "/segmentation.csv")
        labeled.to_netcdf(output_dir + keys[i] + "/df_segmented.cdf")
        labeled.to_expression_matrix().to_pandas().to_csv(output_dir +
                                                          keys[i] +
                                                          "/exp_segmented.csv")
        labeled.to_expression_matrix().save(output_dir + keys[i] +
                                            "/exp_segmented.cdf")
        labeled.to_expression_matrix().save_anndata(output_dir + keys[i] +
                                                    "/exp_segmented.h5ad")
        print("saved fov key: {}, index {}".format(keys[i], i))

    sys.stdout = sys.__stdout__