def test_concatenate_two_expression_matrices(): a_data = np.array([[0, 1], [1, 0]]) b_data = np.array([[0], [1]]) dims = [Features.CELLS, Features.GENES] a_coords = [(Features.CELLS, [0, 1]), (Features.GENES, ["x", "y"])] b_coords = [(Features.CELLS, [0, 1]), (Features.GENES, ["x"])] a = ExpressionMatrix(a_data, dims=dims, coords=a_coords) b = ExpressionMatrix(b_data, dims=dims, coords=b_coords) concatenated = concatenate([a, b]) expected = np.array([[0, 1], [1, 0], [0, np.nan], [1, np.nan]]) np.testing.assert_equal(concatenated.values, expected)
def to_expression_matrix(self) -> ExpressionMatrix: """ Generates a cell x gene count matrix where each cell is annotated with spatial metadata. Requires that spots in the IntensityTable have been assigned to cells. Returns ------- ExpressionMatrix : cell x gene expression table """ if Features.CELL_ID not in self.coords: raise KeyError( "IntensityTable must have 'cell_id' assignments for each cell before this function " "can be called. See starfish.spots.AssignTargets.Label.") grouped = self.to_features_dataframe().groupby( [Features.CELL_ID, Features.TARGET]) counts = grouped.count().iloc[:, 0].unstack().fillna(0) # rename unassigned spots counts.rename(index={'nan': 'unassigned'}, inplace=True) # remove and store 'nan' target counts nan_target_counts = np.zeros(counts.shape[0]) if 'nan' in counts.columns: nan_target_counts = counts['nan'].values counts.drop(columns='nan', inplace=True) if self.has_physical_coords: grouped = self.to_features_dataframe().groupby( [Features.CELL_ID])[[ Axes.X.value, Axes.Y.value, Axes.ZPLANE.value, Coordinates.X.value, Coordinates.Y.value, Coordinates.Z.value ]] else: grouped = self.to_features_dataframe().groupby([ Features.CELL_ID ])[[Axes.X.value, Axes.Y.value, Axes.ZPLANE.value]] min_ = grouped.min() max_ = grouped.max() coordinate_df = min_ + (max_ - min_) / 2 metadata = { name: (Features.CELLS, data.values) for name, data in coordinate_df.items() } metadata[Features.AREA] = (Features.CELLS, np.full(counts.shape[0], fill_value=np.nan)) metadata["number_of_undecoded_spots"] = (Features.CELLS, nan_target_counts) # add genes to the metadata metadata.update({Features.GENES: counts.columns.values}) metadata.update( {Features.CELL_ID: (Features.CELLS, counts.index.values)}) mat = ExpressionMatrix(data=counts.values, dims=(Features.CELLS, Features.GENES), coords=metadata, name='expression_matrix') return mat