Пример #1
0
def test_center_weighting_extraction():
    # sample params
    size_img = (1024, 1024)
    cell_radius = 10
    nuc_radius = 3
    memb_thickness = 5
    nuc_signal_strength = 10
    memb_signal_strength = 10
    nuc_uncertainty_length = 1
    memb_uncertainty_length = 1

    # generate sample segmentation mask and channel data
    sample_segmentation_mask, sample_channel_data = \
        synthetic_spatial_datagen.generate_two_cell_chan_data(
            size_img=size_img,
            cell_radius=cell_radius,
            nuc_radius=nuc_radius,
            memb_thickness=memb_thickness,
            nuc_signal_strength=nuc_signal_strength,
            memb_signal_strength=memb_signal_strength,
            nuc_uncertainty_length=nuc_uncertainty_length,
            memb_uncertainty_length=memb_uncertainty_length
        )

    # extract the cell regions for cells 1 and 2
    coords_1 = np.argwhere(sample_segmentation_mask == 1)
    coords_2 = np.argwhere(sample_segmentation_mask == 2)

    # extract the centroids and coords
    region_info = regionprops(sample_segmentation_mask.astype(np.int16))
    centroid_1 = region_info[0].centroid
    centroid_2 = region_info[1].centroid

    coords_1 = region_info[0].coords
    coords_2 = region_info[1].coords

    channel_counts_1_center_weight = signal_extraction.center_weighting_extraction(
        cell_coords=coords_1,
        image_data=xr.DataArray(sample_channel_data),
        centroid=centroid_1)

    channel_counts_2_center_weight = signal_extraction.center_weighting_extraction(
        cell_coords=coords_2,
        image_data=xr.DataArray(sample_channel_data),
        centroid=centroid_2)

    channel_counts_1_base_weight = signal_extraction.default_extraction(
        cell_coords=coords_1, image_data=xr.DataArray(sample_channel_data))

    channel_counts_2_base_weight = signal_extraction.default_extraction(
        cell_coords=coords_2, image_data=xr.DataArray(sample_channel_data))

    # cell 1 and cell 2 nuclear signal should be lower for weighted than default
    assert channel_counts_1_center_weight[0] < channel_counts_1_base_weight[0]
    assert channel_counts_2_center_weight[1] < channel_counts_2_base_weight[1]

    # assert effect of "bleeding" membrane signal is less with weighted than default
    assert channel_counts_1_center_weight[1] < channel_counts_1_base_weight[1]
def test_default_extraction():
    # this function tests the functionality of default weighting extraction
    # where we just sum across each channel

    # configure your parameters here
    size_img = (1024, 1024)
    cell_radius = 10
    nuc_radius = 3
    memb_thickness = 5
    nuc_signal_strength = 10
    memb_signal_strength = 10
    nuc_uncertainty_length = 0
    memb_uncertainty_length = 0

    # generate sample segmentation mask and channel data
    sample_segmentation_mask, sample_channel_data = \
        synthetic_spatial_datagen.generate_two_cell_test_channel_synthetic_data(size_img=size_img,
                                                                                cell_radius=cell_radius,
                                                                                nuc_radius=nuc_radius,
                                                                                memb_thickness=memb_thickness,
                                                                                nuc_signal_strength=nuc_signal_strength,
                                                                                memb_signal_strength=memb_signal_strength,
                                                                                nuc_uncertainty_length=nuc_uncertainty_length,
                                                                                memb_uncertainty_length=memb_uncertainty_length)

    # extract the cell regions for cells 1 and 2
    coords_1 = np.argwhere(sample_segmentation_mask == 1)
    coords_2 = np.argwhere(sample_segmentation_mask == 2)

    channel_counts_1 = signal_extraction.default_extraction(
        cell_coords=coords_1, image_data=xr.DataArray(sample_channel_data))

    channel_counts_2 = signal_extraction.default_extraction(
        cell_coords=coords_2, image_data=xr.DataArray(sample_channel_data))

    # note that for cell 2 it's higher because of membrane-level expression
    assert np.all(channel_counts_1 == [250, 0])
    assert np.all(channel_counts_2 == [0, 2360])
Пример #3
0
def test_default_extraction():
    # sample params
    size_img = (1024, 1024)
    cell_radius = 10
    nuc_radius = 3
    memb_thickness = 5
    nuc_signal_strength = 10
    memb_signal_strength = 10
    nuc_uncertainty_length = 0
    memb_uncertainty_length = 0

    # generate sample segmentation mask and channel data
    sample_segmentation_mask, sample_channel_data = \
        synthetic_spatial_datagen.generate_two_cell_chan_data(
            size_img=size_img,
            cell_radius=cell_radius,
            nuc_radius=nuc_radius,
            memb_thickness=memb_thickness,
            nuc_signal_strength=nuc_signal_strength,
            memb_signal_strength=memb_signal_strength,
            nuc_uncertainty_length=nuc_uncertainty_length,
            memb_uncertainty_length=memb_uncertainty_length
        )

    # extract the cell regions for cells 1 and 2
    coords_1 = np.argwhere(sample_segmentation_mask == 1)
    coords_2 = np.argwhere(sample_segmentation_mask == 2)

    channel_counts_1 = signal_extraction.default_extraction(
        cell_coords=coords_1, image_data=xr.DataArray(sample_channel_data))

    channel_counts_2 = signal_extraction.default_extraction(
        cell_coords=coords_2, image_data=xr.DataArray(sample_channel_data))

    # test signal counts for different channels
    assert np.all(channel_counts_1 == [250, 0])
    assert np.all(channel_counts_2 == [0, 2360])
Пример #4
0
def compute_marker_counts(input_images,
                          segmentation_masks,
                          nuclear_counts=False):
    """Extract single cell protein expression data from channel TIFs for a single point

        Args:
            input_images (xarray): rows x columns x channels matrix of imaging data
            segmentation_masks (numpy array): rows x columns x compartment matrix of masks
            nuclear_counts (bool): boolean flag to determine whether nuclear counts are returned

        Returns:
            marker_counts (xarray): xarray containing segmented data of cells x markers
    """

    unique_cell_ids = np.unique(segmentation_masks[..., 0].values)

    # define morphology properties to be extracted from regionprops
    object_properties = [
        "label", "area", "eccentricity", "major_axis_length",
        "minor_axis_length", "perimeter", 'coords'
    ]

    # create labels for array holding channel counts and morphology metrics
    feature_names = np.concatenate(
        (np.array('cell_size'), input_images.channels, object_properties[:-1]),
        axis=None)

    # create np.array to hold compartment x cell x feature info
    marker_counts_array = np.zeros((len(segmentation_masks.compartments),
                                    len(unique_cell_ids), len(feature_names)))

    marker_counts = xr.DataArray(copy.copy(marker_counts_array),
                                 coords=[
                                     segmentation_masks.compartments,
                                     unique_cell_ids.astype('int'),
                                     feature_names
                                 ],
                                 dims=['compartments', 'cell_id', 'features'])

    # get regionprops for each cell
    cell_props = pd.DataFrame(
        regionprops_table(segmentation_masks.loc[:, :, 'whole_cell'].values,
                          properties=object_properties))

    if nuclear_counts:
        nuc_mask = segmentation_masks.loc[:, :, 'nuclear'].values
        nuc_props = pd.DataFrame(
            regionprops_table(nuc_mask, properties=object_properties))

    # TODO: There's some repeated code here, maybe worth refactoring? Maybe not
    # loop through each cell in mask
    for cell_id in cell_props['label']:
        # get coords corresponding to current cell.
        cell_coords = cell_props.loc[cell_props['label'] == cell_id,
                                     'coords'].values[0]

        # calculate the total signal intensity within cell
        cell_counts = signal_extraction.default_extraction(
            cell_coords, input_images)

        # get morphology metrics
        current_cell_props = cell_props.loc[cell_props['label'] == cell_id,
                                            object_properties[:-1]]

        # combine marker counts and morphology metrics together
        cell_features = np.concatenate((cell_counts, current_cell_props),
                                       axis=None)

        # add counts of each marker to appropriate column
        marker_counts.loc['whole_cell', cell_id,
                          marker_counts.features[1]:] = cell_features

        # add cell size to first column
        marker_counts.loc['whole_cell', cell_id,
                          marker_counts.features[0]] = cell_coords.shape[0]

        if nuclear_counts:
            # get id of corresponding nucleus
            nuc_id = segmentation_utils.find_nuclear_mask_id(
                nuc_segmentation_mask=nuc_mask, cell_coords=cell_coords)

            if nuc_id is None:
                # no nucleus found within this cell
                pass
            else:
                # get coordinates of corresponding nucleus
                nuc_coords = nuc_props.loc[nuc_props['label'] == nuc_id,
                                           'coords'].values[0]

                # extract nuclear signal
                nuc_counts = signal_extraction.default_extraction(
                    nuc_coords, input_images)

                # get morphology metrics
                current_nuc_props = nuc_props.loc[nuc_props['label'] == nuc_id,
                                                  object_properties[:-1]]

                # combine marker counts and morphology metrics together
                nuc_features = np.concatenate((nuc_counts, current_nuc_props),
                                              axis=None)

                # add counts of each marker to appropriate column
                marker_counts.loc['nuclear', cell_id,
                                  marker_counts.features[1]:] = nuc_features

                # add cell size to first column
                marker_counts.loc['nuclear', cell_id, marker_counts.features[0]] = \
                    nuc_coords.shape[0]

    return marker_counts
Пример #5
0
def compute_marker_counts(input_images,
                          segmentation_masks,
                          nuclear_counts=False,
                          regionprops_features=None,
                          split_large_nuclei=False):
    """Extract single cell protein expression data from channel TIFs for a single fov

    Args:
        input_images (xarray.DataArray):
            rows x columns x channels matrix of imaging data
        segmentation_masks (numpy.ndarray):
            rows x columns x compartment matrix of masks
        nuclear_counts (bool):
            boolean flag to determine whether nuclear counts are returned
        regionprops_features (list):
            morphology features for regionprops to extract for each cell
        split_large_nuclei (bool):
            controls whether nuclei which have portions outside of the cell will get relabeled

    Returns:
        xarray.DataArray:
            xarray containing segmented data of cells x markers
    """

    if regionprops_features is None:
        regionprops_features = [
            'label', 'area', 'eccentricity', 'major_axis_length',
            'minor_axis_length', 'perimeter', 'centroid'
        ]

    if 'coords' not in regionprops_features:
        regionprops_features.append('coords')

    # create variable to hold names of returned columns only
    regionprops_names = copy.copy(regionprops_features)
    regionprops_names.remove('coords')

    # centroid returns two columns, need to modify names
    if np.isin('centroid', regionprops_names):
        regionprops_names.remove('centroid')
        regionprops_names += ['centroid-0', 'centroid-1']

    unique_cell_ids = np.unique(segmentation_masks[..., 0].values)
    unique_cell_ids = unique_cell_ids[np.nonzero(unique_cell_ids)]

    # create labels for array holding channel counts and morphology metrics
    feature_names = np.concatenate(
        (np.array('cell_size'), input_images.channels, regionprops_names),
        axis=None)

    # create np.array to hold compartment x cell x feature info
    marker_counts_array = np.zeros((len(segmentation_masks.compartments),
                                    len(unique_cell_ids), len(feature_names)))

    marker_counts = xr.DataArray(copy.copy(marker_counts_array),
                                 coords=[
                                     segmentation_masks.compartments,
                                     unique_cell_ids.astype('int'),
                                     feature_names
                                 ],
                                 dims=['compartments', 'cell_id', 'features'])

    # get regionprops for each cell
    cell_props = pd.DataFrame(
        regionprops_table(segmentation_masks.loc[:, :, 'whole_cell'].values,
                          properties=regionprops_features))

    if nuclear_counts:
        nuc_mask = segmentation_masks.loc[:, :, 'nuclear'].values

        if split_large_nuclei:
            cell_mask = segmentation_masks.loc[:, :, 'whole_cell'].values
            nuc_mask = segmentation_utils.split_large_nuclei(
                cell_segmentation_mask=cell_mask,
                nuc_segmentation_mask=nuc_mask,
                cell_ids=unique_cell_ids)

        nuc_props = pd.DataFrame(
            regionprops_table(nuc_mask, properties=regionprops_features))

    # TODO: There's some repeated code here, maybe worth refactoring? Maybe not
    # loop through each cell in mask
    for cell_id in cell_props['label']:
        # get coords corresponding to current cell.
        cell_coords = cell_props.loc[cell_props['label'] == cell_id,
                                     'coords'].values[0]

        # calculate the total signal intensity within cell
        cell_counts = signal_extraction.default_extraction(
            cell_coords, input_images)

        # get morphology metrics
        current_cell_props = cell_props.loc[cell_props['label'] == cell_id,
                                            regionprops_names]

        # combine marker counts and morphology metrics together
        cell_features = np.concatenate((cell_counts, current_cell_props),
                                       axis=None)

        # add counts of each marker to appropriate column
        marker_counts.loc['whole_cell', cell_id,
                          marker_counts.features[1]:] = cell_features

        # add cell size to first column
        marker_counts.loc['whole_cell', cell_id,
                          marker_counts.features[0]] = cell_coords.shape[0]

        if nuclear_counts:
            # get id of corresponding nucleus
            nuc_id = segmentation_utils.find_nuclear_mask_id(
                nuc_segmentation_mask=nuc_mask, cell_coords=cell_coords)

            if nuc_id is None:
                # no nucleus found within this cell
                pass
            else:
                # get coordinates of corresponding nucleus
                nuc_coords = nuc_props.loc[nuc_props['label'] == nuc_id,
                                           'coords'].values[0]

                # extract nuclear signal
                nuc_counts = signal_extraction.default_extraction(
                    nuc_coords, input_images)

                # get morphology metrics
                current_nuc_props = nuc_props.loc[nuc_props['label'] == nuc_id,
                                                  regionprops_names]

                # combine marker counts and morphology metrics together
                nuc_features = np.concatenate((nuc_counts, current_nuc_props),
                                              axis=None)

                # add counts of each marker to appropriate column
                marker_counts.loc['nuclear', cell_id,
                                  marker_counts.features[1]:] = nuc_features

                # add cell size to first column
                marker_counts.loc['nuclear', cell_id, marker_counts.features[0]] = \
                    nuc_coords.shape[0]

    return marker_counts
def test_center_weighting_extraction():
    # this function tests the functionality of center weighting extraction
    # where we add a weighting scheme with more confidence toward the center
    # before summing across each channel

    # configure your parameters here
    size_img = (1024, 1024)
    cell_radius = 10
    nuc_radius = 3
    memb_thickness = 5
    nuc_signal_strength = 10
    memb_signal_strength = 10
    nuc_uncertainty_length = 1
    memb_uncertainty_length = 1

    # generate sample segmentation mask and channel data
    sample_segmentation_mask, sample_channel_data = \
        synthetic_spatial_datagen.generate_two_cell_test_channel_synthetic_data(size_img=size_img,
                                                                                cell_radius=cell_radius,
                                                                                nuc_radius=nuc_radius,
                                                                                memb_thickness=memb_thickness,
                                                                                nuc_signal_strength=nuc_signal_strength,
                                                                                memb_signal_strength=memb_signal_strength,
                                                                                nuc_uncertainty_length=nuc_uncertainty_length,
                                                                                memb_uncertainty_length=memb_uncertainty_length)

    # extract the cell regions for cells 1 and 2
    coords_1 = np.argwhere(sample_segmentation_mask == 1)
    coords_2 = np.argwhere(sample_segmentation_mask == 2)

    # generate region info using regionprops, used to extract the centroids and coords
    region_info = regionprops(sample_segmentation_mask.astype(np.int16))
    centroid_1 = region_info[0].centroid
    centroid_2 = region_info[1].centroid

    # could use np.argwhere for this but might as well standardize the entire thing
    coords_1 = region_info[0].coords
    coords_2 = region_info[1].coords

    channel_counts_1_center_weight = signal_extraction.center_weighting_extraction(
        cell_coords=coords_1,
        image_data=xr.DataArray(sample_channel_data),
        centroid=centroid_1)

    channel_counts_2_center_weight = signal_extraction.center_weighting_extraction(
        cell_coords=coords_2,
        image_data=xr.DataArray(sample_channel_data),
        centroid=centroid_2)

    channel_counts_1_base_weight = signal_extraction.default_extraction(
        cell_coords=coords_1, image_data=xr.DataArray(sample_channel_data))

    channel_counts_2_base_weight = signal_extraction.default_extraction(
        cell_coords=coords_2, image_data=xr.DataArray(sample_channel_data))

    # assert that the nuclear signal for cell 1 is lower for weighted than for base
    # same for membrane signal for cell 2
    assert channel_counts_1_center_weight[0] < channel_counts_1_base_weight[0]
    assert channel_counts_2_center_weight[1] < channel_counts_2_base_weight[1]

    # we intentionally bled membrane signal from cell 2 into cell 1
    # a weighted signal technique will ensure that this bleeding will be curbed
    # thus the signal noise will be drastically reduced
    # so there will not be as much membrane noise in cell 1 in this case
    assert channel_counts_1_center_weight[1] < channel_counts_1_base_weight[1]