Exemplo n.º 1
0
def test_concatenate_two_expression_matrices():
    a_data = np.array([[0, 1], [1, 0]])
    b_data = np.array([[0], [1]])
    dims = [Features.CELLS, Features.GENES]
    a_coords = [(Features.CELLS, [0, 1]), (Features.GENES, ["x", "y"])]
    b_coords = [(Features.CELLS, [0, 1]), (Features.GENES, ["x"])]

    a = ExpressionMatrix(a_data, dims=dims, coords=a_coords)
    b = ExpressionMatrix(b_data, dims=dims, coords=b_coords)

    concatenated = concatenate([a, b])

    expected = np.array([[0, 1], [1, 0], [0, np.nan], [1, np.nan]])

    np.testing.assert_equal(concatenated.values, expected)
Exemplo n.º 2
0
    def to_expression_matrix(self) -> ExpressionMatrix:
        """
        Generates a cell x gene count matrix where each cell is annotated with spatial metadata.

        Requires that spots in the IntensityTable have been assigned to cells.

        Returns
        -------
        ExpressionMatrix :
            cell x gene expression table
        """
        if Features.CELL_ID not in self.coords:
            raise KeyError(
                "IntensityTable must have 'cell_id' assignments for each cell before this function "
                "can be called. See starfish.spots.AssignTargets.Label.")
        grouped = self.to_features_dataframe().groupby(
            [Features.CELL_ID, Features.TARGET])
        counts = grouped.count().iloc[:, 0].unstack().fillna(0)
        # rename unassigned spots
        counts.rename(index={'nan': 'unassigned'}, inplace=True)
        # remove and store 'nan' target counts
        nan_target_counts = np.zeros(counts.shape[0])
        if 'nan' in counts.columns:
            nan_target_counts = counts['nan'].values
            counts.drop(columns='nan', inplace=True)
        if self.has_physical_coords:
            grouped = self.to_features_dataframe().groupby(
                [Features.CELL_ID])[[
                    Axes.X.value, Axes.Y.value, Axes.ZPLANE.value,
                    Coordinates.X.value, Coordinates.Y.value,
                    Coordinates.Z.value
                ]]
        else:
            grouped = self.to_features_dataframe().groupby([
                Features.CELL_ID
            ])[[Axes.X.value, Axes.Y.value, Axes.ZPLANE.value]]
        min_ = grouped.min()
        max_ = grouped.max()
        coordinate_df = min_ + (max_ - min_) / 2
        metadata = {
            name: (Features.CELLS, data.values)
            for name, data in coordinate_df.items()
        }
        metadata[Features.AREA] = (Features.CELLS,
                                   np.full(counts.shape[0], fill_value=np.nan))
        metadata["number_of_undecoded_spots"] = (Features.CELLS,
                                                 nan_target_counts)
        # add genes to the metadata
        metadata.update({Features.GENES: counts.columns.values})
        metadata.update(
            {Features.CELL_ID: (Features.CELLS, counts.index.values)})

        mat = ExpressionMatrix(data=counts.values,
                               dims=(Features.CELLS, Features.GENES),
                               coords=metadata,
                               name='expression_matrix')
        return mat