示例#1
0
def expression_patterns(ds: loompy.LoomConnection, labels: np.ndarray, pep: float, f: float, cells: np.ndarray = None) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
	"""
	Derive enrichment and trinary scores for all genes

	Args:
		ds (LoomConnection):	Dataset
		labels (numpy array):	Cluster labels (one per cell)
		pep (float):			Desired posterior error probability
		f (float):				Fraction required for a gene to be considered 'expressed'
		cells (nump array):		Indices of cells to include

	Returns:
		score1 (numpy 2d array):	Array of (n_genes, n_labels)
		score2 (numpy 2d array):	Array of (n_genes, n_labels)
		trinary (numpy 2d array):		Array of (n_genes, n_labels)

	Remarks:
		If the cells argument is provided, the labels should include only those cells. That is,
		labels.shape[0] == cells.shape[0].

		Amit says,
		regarding marker genes.
		i usually rank the genes by some kind of enrichment score.
		score1 = mean of gene within the cluster / mean of gene in all cells
		score2 = fraction of positive cells within cluster

		enrichment score = score1 * score2^power   (where power == 0.5 or 1) i usually use 1 for 10x data
	"""

	n_labels = np.max(labels) + 1

	scores1 = np.empty((ds.shape[0], n_labels))
	scores2 = np.empty((ds.shape[0], n_labels))
	trinary_pat = np.empty((ds.shape[0], n_labels))
	trinary_prob = np.empty((ds.shape[0], n_labels))

	j = 0
	for (ix, selection, vals) in ds.batch_scan(cells=cells, genes=None, axis=0):
		# vals = normalizer.normalize(vals, selection)
		for j, row in enumerate(selection):
			data = vals[j, :]
			mu0 = np.mean(data)
			f0 = np.count_nonzero(data)
			score1 = np.zeros(n_labels)
			score2 = np.zeros(n_labels)
			for lbl in range(n_labels):
				if np.sum(labels == lbl) == 0:
					continue
				sel = data[np.where(labels == lbl)[0]]
				if mu0 == 0 or f0 == 0:
					score1[lbl] = 0
					score2[lbl] = 0
				else:
					score1[lbl] = np.mean(sel) / mu0
					score2[lbl] = np.count_nonzero(sel) #  f0
			scores1[row, :] = score1
			scores2[row, :] = score2
			trinary_prob[row, :], trinary_pat[row, :] = betabinomial_trinarize_array(data, labels, pep, f)
	return (scores1, scores2, trinary_prob, trinary_pat)
示例#2
0
    def fit(self, ds: loompy.LoomConnection) -> np.ndarray:
        cells = np.where(ds.col_attrs["Clusters"] >= 0)[0]
        labels = ds.col_attrs["Clusters"][cells]
        n_labels = np.max(labels) + 1
        logging.info("n_labels %d", n_labels)
        self.trinary_prob = np.empty((ds.shape[0], n_labels))
        self.genes = ds.ra.Gene

        j = 0
        for (ix, selection, vals) in ds.batch_scan(cells=cells,
                                                   genes=None,
                                                   axis=0):
            for j, row in enumerate(selection):
                data = np.round(vals[j, :])
                self.trinary_prob[row, :] = self._betabinomial_trinarize_array(
                    data, labels, self.f, n_labels)

        return self.trinary_prob