示例#1
0
    def fit(
        self, ds: loompy.LoomConnection
    ) -> Tuple[sparse.coo_matrix, sparse.coo_matrix, np.ndarray]:
        """
		Discover the manifold

		Returns:
			knn		The knn graph as a sparse matrix
			mknn	Mutual knn subgraph
			pos		2D projection (gt-SNE) as ndarray with shape (n_cells, 2)
		"""
        n_cells = ds.shape[1]
        logging.info("Processing all %d cells", n_cells)
        logging.info("Validating genes")
        nnz = ds.map([np.count_nonzero], axis=0)[0]
        valid_genes = np.logical_and(nnz > 5,
                                     nnz < ds.shape[1] * 0.5).astype("int")
        ds.ra._Valid = valid_genes
        logging.info("%d of %d genes were valid", np.sum(ds.ra._Valid == 1),
                     ds.shape[0])

        logging.info("Normalization")
        normalizer = cg.Normalizer(False)
        normalizer.fit(ds)
        logging.info("Selecting up to %d genes", self.n_genes)
        genes = cg.FeatureSelection(self.n_genes).fit(ds,
                                                      mu=normalizer.mu,
                                                      sd=normalizer.sd)

        logging.info("Loading data for selected genes")
        data = np.zeros((n_cells, genes.shape[0]))
        for (ix, selection, view) in ds.scan(axis=1):
            data[selection - ix, :] = view[genes, :].T

        logging.info("Computing initial subspace KNN")
        subspaces = np.ones(data.shape)
        knn = subspace_knn_graph(data, subspaces)
        mknn = knn.minimum(knn.transpose()).tocoo()

        for t in range(5):
            logging.info(f"Refining subspace KNN (iteration {t + 1})")

            logging.info("Louvain clustering")
            graph = nx.from_scipy_sparse_matrix(mknn)
            partitions = community.best_partition(graph)
            labels = np.array(
                [partitions[key] for key in range(mknn.shape[0])])
            ds.ca.Clusters = labels
            n_labels = np.max(labels) + 1
            logging.info(f"Found {n_labels} clusters")

            logging.info("Marker selection")
            (_, enrichment, _) = cg.MarkerSelection(n_markers=10,
                                                    findq=False).fit(ds)
            subspaces = np.zeros(data.shape)
            for ix in range(enrichment.shape[1]):
                for j in range(n_cells):
                    subspaces[j,
                              np.argsort(-enrichment[:, ix])[:self.n_genes //
                                                             n_labels]] = 1
            knn = subspace_knn_graph(data, subspaces)
            mknn = knn.minimum(knn.transpose()).tocoo()

        perplexity = min(self.k, (n_cells - 1) / 3 - 1)
        logging.info("gt-SNE layout")
        # Note that perplexity argument is ignored in this case, but must still be given
        # because bhtsne will check that it has a valid value
        tsne_pos = cg.TSNE(perplexity=perplexity).layout(data, knn=knn.tocsr())

        return (knn, mknn, tsne_pos)
示例#2
0
	def fit(self, ds: loompy.LoomConnection) -> Tuple[sparse.coo_matrix, sparse.coo_matrix, np.ndarray]:
		"""
		Discover the manifold
		Args:
			n_genes		Number of genes to use for manifold learning (ignored if genes is not None)
			gtsnse		Use graph t-SNE for layout (default: standard tSNE)
			alpha		The scale parameter for multiscale KNN
			genes		List of genes to use for manifold learning

		Returns:
			knn		The multiscale knn graph as a sparse matrix, with k = 100
			mknn	Mutual knn subgraph, with k = 20
			pos		2D projection (t-SNE or gt-SNE) as ndarray with shape (n_cells, 2)
		"""
		n_valid = np.sum(ds.col_attrs["_Valid"] == 1)
		n_total = ds.shape[1]
		logging.info("%d of %d cells were valid", n_valid, n_total)
		logging.info("%d of %d genes were valid", np.sum(ds.row_attrs["_Valid"] == 1), ds.shape[0])
		cells = np.where(ds.col_attrs["_Valid"] == 1)[0]

		logging.info("Normalization")
		normalizer = cg.Normalizer(False)
		normalizer.fit(ds)

		if self.filter_cellcycle is not None:
			cell_cycle_genes = np.array(open(self.filter_cellcycle).read().split())
			mask = np.in1d(ds.ra.Gene, cell_cycle_genes)
			if np.sum(mask) == 0:
				logging.warn("None cell cycle genes where filtered, check your gene list")
		else:
			mask = None

		if self.genes is None:
			logging.info("Selecting up to %d genes", self.n_genes)
			genes = cg.FeatureSelection(self.n_genes).fit(ds, mu=normalizer.mu, sd=normalizer.sd, mask=mask)
			temp = np.zeros(ds.shape[0])
			temp[genes] = 1
			ds.set_attr("_Selected", temp, axis=0)
			logging.info("%d genes selected", temp.sum())

			n_components = min(50, n_valid)
			logging.info("PCA projection to %d components", n_components)
			pca = cg.PCAProjection(genes, max_n_components=n_components, layer=self.layer)
			pca_transformed = pca.fit_transform(ds, normalizer, cells=cells)
			transformed = pca_transformed

			logging.info("Generating KNN graph")
			k = min(10, n_valid - 1)
			nn = NearestNeighbors(n_neighbors=k, algorithm="ball_tree", n_jobs=4)
			nn.fit(transformed)
			knn = nn.kneighbors_graph(mode='connectivity')
			knn = knn.tocoo()
			mknn = knn.minimum(knn.transpose()).tocoo()

			logging.info("Louvain-Jaccard clustering")
			lj = cg.LouvainJaccard(resolution=1)
			labels = lj.fit_predict(knn)

			# Make labels for excluded cells == -1
			labels_all = np.zeros(ds.shape[1], dtype='int') + -1
			labels_all[cells] = labels
			ds.set_attr("Clusters", labels_all, axis=1)
			n_labels = np.max(labels) + 1
			logging.info("Found " + str(n_labels) + " LJ clusters")

			logging.info("Marker selection")
			(genes, _, _) = cg.MarkerSelection(n_markers=int(500 / n_labels), mask=mask).fit(ds)
		else:
			genes = self.genes

		temp = np.zeros(ds.shape[0])
		temp[genes] = 1
		ds.set_attr("_Selected", temp, axis=0)
		logging.info("%d genes selected", temp.sum())

		n_components = min(50, n_valid)
		logging.info("PCA projection to %d components", n_components)
		pca = cg.PCAProjection(genes, max_n_components=n_components, layer=self.layer)
		pca_transformed = pca.fit_transform(ds, normalizer, cells=cells)
		transformed = pca_transformed

		logging.info("Generating KNN graph")
		k = min(10, n_valid - 1)
		nn = NearestNeighbors(n_neighbors=k, algorithm="ball_tree", n_jobs=4)
		nn.fit(transformed)
		knn = nn.kneighbors_graph(mode='connectivity')
		knn = knn.tocoo()
		mknn = knn.minimum(knn.transpose()).tocoo()

		logging.info("Louvain-Jaccard clustering")
		lj = cg.LouvainJaccard(resolution=1)
		labels = lj.fit_predict(knn)

		# Make labels for excluded cells == -1
		labels_all = np.zeros(ds.shape[1], dtype='int') + -1
		labels_all[cells] = labels
		ds.set_attr("Clusters", labels_all, axis=1)
		n_labels = np.max(labels) + 1
		logging.info("Found " + str(n_labels) + " LJ clusters")

		logging.info("Marker selection")
		(genes, _, _) = cg.MarkerSelection(n_markers=int(500 / n_labels)).fit(ds)

		# Select cells across clusters more uniformly, preventing a single cluster from dominating the PCA
		cells_adjusted = cg.cap_select(labels, cells, int(n_valid * 0.2))
		n_components = min(50, cells_adjusted.shape[0])
		logging.info("PCA projection to %d components", n_components)
		pca = cg.PCAProjection(genes, max_n_components=n_components)
		pca.fit(ds, normalizer, cells=cells_adjusted)
		# Note that here we're transforming all cells; we just did the fit on the selection
		transformed = pca.transform(ds, normalizer, cells=cells)

		k = min(100, n_valid - 1)
		logging.info("Generating multiscale KNN graph (k = %d)", k)
		nn = NearestNeighbors(n_neighbors=k, algorithm="ball_tree", n_jobs=4)
		nn.fit(transformed)
		knn = nn.kneighbors(return_distance=False)  # shape: (n_cells, k)
		n_cells = knn.shape[0]
		a = np.tile(np.arange(n_cells), k)
		b = np.reshape(knn.T, (n_cells * k,))
		w = np.repeat(1 / np.power(np.arange(1, k + 1), self.alpha), n_cells)
		knn = sparse.coo_matrix((w, (a, b)), shape=(n_cells, n_cells))
		threshold = w > 0.05
		mknn = sparse.coo_matrix((w[threshold], (a[threshold], b[threshold])), shape=(n_cells, n_cells))
		mknn = mknn.minimum(mknn.transpose()).tocoo()

		perplexity = min(k, (n_valid - 1) / 3 - 1)
		if self.gtsne:
			logging.info("gt-SNE layout")
			# Note that perplexity argument is ignored in this case, but must still be given
			# because bhtsne will check that it has a valid value
			tsne_pos = cg.TSNE(perplexity=perplexity).layout(transformed, knn=knn.tocsr())
		else:
			logging.info("t-SNE layout")
			tsne_pos = cg.TSNE(perplexity=perplexity).layout(transformed)
		tsne_all = np.zeros((ds.shape[1], 2), dtype='int') + np.min(tsne_pos, axis=0)
		tsne_all[cells] = tsne_pos

		# Transform back to the full set of cells
		knn = sparse.coo_matrix((knn.data, (cells[knn.row], cells[knn.col])), shape=(n_total, n_total))
		mknn = sparse.coo_matrix((mknn.data, (cells[mknn.row], cells[mknn.col])), shape=(n_total, n_total))

		return (knn, mknn, tsne_all)
def compute_tsne(ds, tsne_input, perplexity=100, namespace="", seed=0):
    np.random.seed(seed)
    ds.ca[r"{namespace}TSNE"] = cg.TSNE(
        perplexity=perplexity).layout(tsne_input)
示例#4
0
	def fit(self, ds: loompy.LoomConnection, initial_pos: np.ndarray = None, nng: np.ndarray = None, blocked_genes: np.ndarray = None) -> Tuple[sparse.coo_matrix, sparse.coo_matrix, np.ndarray]:
		"""
		Discover the manifold
		Args:
			n_genes			Number of genes to use for manifold learning (ignored if genes is not None)
			gtsnse			Use graph t-SNE for layout (default: standard tSNE)
			alpha			The scale parameter for multiscale KNN
			genes			List of genes to use for manifold learning
			initial_pos		Use this initial layout, shape (ds.shape[1], 2)
			nng				Non-neuronal genes, set these to zero in neurons (mask array)
			blocked_gens	Don't use these genes (mask array)

		Returns:
			knn		The multiscale knn graph as a sparse matrix, with k = 100
			mknn	Mutual knn subgraph, with k = 20
			pos		2D projection (t-SNE or gt-SNE) as ndarray with shape (n_cells, 2)
		"""
		n_cells = ds.shape[1]
		logging.info("Processing all %d cells", n_cells)
		logging.info("%d of %d genes were valid", np.sum(ds.row_attrs["_Valid"] == 1), ds.shape[0])

		logging.info("Normalization")
		normalizer = cg.Normalizer(False)
		normalizer.fit(ds)

		if self.filter_cellcycle is not None:
			cell_cycle_genes = np.array(open(self.filter_cellcycle).read().split())
			mask = np.in1d(ds.ra.Gene, cell_cycle_genes)
			if np.sum(mask) == 0:
				logging.warn("None cell cycle genes where filtered, check your gene list")
		else:
			mask = None
		
		if blocked_genes is not None:
			if mask is None:
				mask = blocked_genes
			else:
				mask = mask & blocked_genes

		if self.genes is None:
			logging.info("Selecting up to %d genes", self.n_genes)
			genes = cg.FeatureSelection(self.n_genes).fit(ds, mu=normalizer.mu, sd=normalizer.sd, mask=mask)

			n_components = min(50, n_cells)
			logging.info("PCA projection to %d components", n_components)
			pca = cg.PCAProjection(genes, max_n_components=n_components)
			pca_transformed = pca.fit_transform(ds, normalizer)
			transformed = pca_transformed

			logging.info("Generating balanced KNN graph")
			np.random.seed(0)
			k = min(self.k, n_cells - 1)
			bnn = cg.BalancedKNN(k=k, maxl=2 * k, sight_k=2 * k)
			bnn.fit(transformed)
			knn = bnn.kneighbors_graph(mode='connectivity')
			knn = knn.tocoo()
			mknn = knn.minimum(knn.transpose()).tocoo()

			logging.info("MKNN-Louvain clustering with outliers")
			(a, b, w) = (mknn.row, mknn.col, mknn.data)
			random.seed(13)
			lj = cg.LouvainJaccard(resolution=1, jaccard=False)
			labels = lj.fit_predict(knn)
			bigs = np.where(np.bincount(labels) >= 10)[0]
			mapping = {k: v for v, k in enumerate(bigs)}
			labels = np.array([mapping[x] if x in bigs else -1 for x in labels])

			n_labels = np.max(labels) + 1
			logging.info("Found " + str(n_labels) + " clusters")

			logging.info("Marker selection")
			temp = None
			if "Clusters" in ds.ca:
				temp = ds.ca.Clusters
			ds.ca.Clusters = labels - labels.min()
			(genes, _, _) = cg.MarkerSelection(n_markers=int(500 / n_labels), mask=mask, findq=False).fit(ds)
			if temp is not None:
				ds.ca.Clusters = temp
		else:
			genes = self.genes

		temp = np.zeros(ds.shape[0], dtype='bool')
		temp[genes] = True
		ds.ra._Selected = temp.astype('int')
		logging.info("%d genes selected", temp.sum())

		if self.genes is None:
			# Select cells across clusters more uniformly, preventing a single cluster from dominating the PCA
			cells_adjusted = cg.cap_select(labels - labels.min(), np.arange(n_cells), int(n_cells * 0.2))
			n_components = min(50, cells_adjusted.shape[0])
			logging.info("PCA projection to %d components", n_components)
			pca = cg.PCAProjection(genes, max_n_components=n_components)
			pca.fit(ds, normalizer, cells=cells_adjusted)
		else:
			n_components = min(50, n_cells)
			logging.info("PCA projection to %d components", n_components)
			pca = cg.PCAProjection(genes, max_n_components=n_components)
			pca.fit(ds, normalizer)
			
		# Note that here we're transforming all cells; we just did the fit on the selection
		transformed = pca.transform(ds, normalizer)

		k = min(self.k, n_cells - 1)
		logging.info("Generating multiscale KNN graph (k = %d)", k)
		bnn = cg.BalancedKNN(k=k, maxl=2 * k, sight_k=2 * k)
		bnn.fit(transformed)
		knn = bnn.kneighbors(mode='connectivity')[1][:, 1:]
		n_cells = knn.shape[0]
		a = np.tile(np.arange(n_cells), k)
		b = np.reshape(knn.T, (n_cells * k,))
		w = np.repeat(1 / np.power(np.arange(1, k + 1), self.alpha), n_cells)
		knn = sparse.coo_matrix((w, (a, b)), shape=(n_cells, n_cells))
		threshold = w > 0.025
		mknn = sparse.coo_matrix((w[threshold], (a[threshold], b[threshold])), shape=(n_cells, n_cells))
		mknn = mknn.minimum(mknn.transpose()).tocoo()

		perplexity = min(k, (n_cells - 1) / 3 - 1)
		if self.gtsne:
			logging.info("gt-SNE layout")
			# Note that perplexity argument is ignored in this case, but must still be given
			# because bhtsne will check that it has a valid value
			tsne_pos = cg.TSNE(perplexity=perplexity, max_iter=self.max_iter).layout(transformed, knn=knn.tocsr(), initial_pos=initial_pos)
		else:
			logging.info("t-SNE layout")
			tsne_pos = cg.TSNE(perplexity=perplexity, max_iter=self.max_iter).layout(transformed, initial_pos=initial_pos)

		return (knn, mknn, tsne_pos)
示例#5
0
    def fit(
        self, ds: loompy.LoomConnection
    ) -> Tuple[sparse.coo_matrix, sparse.coo_matrix, np.ndarray]:
        """
		Discover the manifold
		Args:
			n_genes		Number of genes to use for manifold learning (ignored if genes is not None)
			gtsnse		Use graph t-SNE for layout (default: standard tSNE)
			alpha		The scale parameter for multiscale KNN
			genes		List of genes to use for manifold learning

		Returns:
			knn		The multiscale knn graph as a sparse matrix, with k = 100
			mknn	Mutual knn subgraph, with k = 20
			pos		2D projection (t-SNE or gt-SNE) as ndarray with shape (n_cells, 2)
		"""
        n_cells = ds.shape[1]
        logging.info("Processing all %d cells", n_cells)
        logging.info("%d of %d genes were valid",
                     np.sum(ds.row_attrs["_Valid"] == 1), ds.shape[0])

        logging.info("Normalization")
        normalizer = cg.Normalizer(False)
        normalizer.fit(ds)

        if self.filter_cellcycle is not None:
            cell_cycle_genes = np.array(
                open(self.filter_cellcycle).read().split())
            mask = np.in1d(ds.row_attrs["Gene"], cell_cycle_genes)
            if np.sum(mask) == 0:
                logging.warn(
                    "None cell cycle genes where filtered, check your gene list"
                )
        else:
            mask = None

        if self.genes is None:
            logging.info("Selecting up to %d genes", self.n_genes)
            genes = cg.FeatureSelection(self.n_genes).fit(ds,
                                                          mu=normalizer.mu,
                                                          sd=normalizer.sd,
                                                          mask=mask)

            n_components = min(50, n_cells)
            logging.info("PCA projection to %d components", n_components)
            pca = cg.PCAProjection(genes, max_n_components=n_components)
            pca_transformed = pca.fit_transform(ds, normalizer)
            transformed = pca_transformed

            logging.info("Generating balanced KNN graph")
            k = min(self.k, n_cells - 1)
            bnn = cg.BalancedKNN(k=k, maxl=2 * k)
            bnn.fit(transformed)
            knn = bnn.kneighbors_graph(mode='connectivity')
            knn = knn.tocoo()
            mknn = knn.minimum(knn.transpose()).tocoo()

            logging.info("MKNN-Louvain clustering with outliers")
            (a, b, w) = (mknn.row, mknn.col, mknn.data)
            G = igraph.Graph(list(zip(a, b)),
                             directed=False,
                             edge_attrs={'weight': w})
            VxCl = G.community_multilevel(return_levels=False,
                                          weights="weight")
            labels = np.array(VxCl.membership)
            bigs = np.where(np.bincount(labels) >= 10)[0]
            mapping = {k: v for v, k in enumerate(bigs)}
            labels = np.array(
                [mapping[x] if x in bigs else -1 for x in labels])

            # Make labels for excluded cells == -1
            ds.set_attr("Clusters", labels, axis=1)
            n_labels = np.max(labels) + 1
            logging.info("Found " + str(n_labels) + " clusters")

            logging.info("Marker selection")
            (genes, _,
             _) = cg.MarkerSelection(n_markers=int(500 / n_labels)).fit(ds)
        else:
            genes = self.genes

        temp = np.zeros(ds.shape[0])
        temp[genes] = 1
        ds.set_attr("_Selected", temp, axis=0)
        logging.info("%d genes selected", temp.sum())

        if self.genes is None:
            # Select cells across clusters more uniformly, preventing a single cluster from dominating the PCA
            cells_adjusted = cg.cap_select(labels - labels.min(),
                                           np.arange(n_cells),
                                           int(n_cells * 0.2))
            n_components = min(50, cells_adjusted.shape[0])
            logging.info("PCA projection to %d components", n_components)
            pca = cg.PCAProjection(genes, max_n_components=n_components)
            pca.fit(ds, normalizer, cells=cells_adjusted)
        else:
            n_components = min(50, n_cells)
            logging.info("PCA projection to %d components", n_components)
            pca = cg.PCAProjection(genes, max_n_components=n_components)
            pca.fit(ds, normalizer)

        # Note that here we're transforming all cells; we just did the fit on the selection
        transformed = pca.transform(ds, normalizer)

        k = min(self.k, n_cells - 1)
        logging.info("Generating multiscale KNN graph (k = %d)", k)
        bnn = cg.BalancedKNN(k=k, maxl=2 * k)
        bnn.fit(transformed)
        knn = bnn.kneighbors(mode='connectivity')[1][:, 1:]
        n_cells = knn.shape[0]
        a = np.tile(np.arange(n_cells), k)
        b = np.reshape(knn.T, (n_cells * k, ))
        w = np.repeat(1 / np.power(np.arange(1, k + 1), self.alpha), n_cells)
        knn = sparse.coo_matrix((w, (a, b)), shape=(n_cells, n_cells))
        threshold = w > 0.05
        mknn = sparse.coo_matrix((w[threshold], (a[threshold], b[threshold])),
                                 shape=(n_cells, n_cells))
        mknn = mknn.minimum(mknn.transpose()).tocoo()

        perplexity = min(k, (n_cells - 1) / 3 - 1)
        if self.gtsne:
            logging.info("gt-SNE layout")
            # Note that perplexity argument is ignored in this case, but must still be given
            # because bhtsne will check that it has a valid value
            tsne_pos = cg.TSNE(perplexity=perplexity).layout(transformed,
                                                             knn=knn.tocsr())
        else:
            logging.info("t-SNE layout")
            tsne_pos = cg.TSNE(perplexity=perplexity).layout(transformed)

        return (knn, mknn, tsne_pos)
示例#6
0
    def run(self) -> None:
        logging = cg.logging(self)
        with self.output().temporary_path() as out_file:
            logging.info("Aggregating loom file")
            ds = loompy.connect(self.input().fn)
            spec = {
                "Age": "tally",
                "Clusters": "first",
                "Class": "mode",
                "_Total": "mean",
                "Sex": "tally",
                "Tissue": "tally",
                "SampleID": "tally",
                "TissuePool": "first",
                "Outliers": "mean",
                "Bucket": "mode",
                "Region": "first",
                "OriginalClusters": "first",
                "LeafOrder": "first",
                "Probable_location": "first",
                "Developmental_compartment": "first",
                "Description": "first",
                "Location_based_on": "first",
                "Neurotransmitter": "first",
                "LeafOrder": "first",
                "Comment": "first",
                "ClusterName": "first",
                "TaxonomyRank1": "first",
                "TaxonomyRank2": "first",
                "TaxonomyRank3": "first",
                "TaxonomyRank4": "first",
                "TaxonomySymbol": "first"
            }
            cg.Aggregator(f=[0.2, 0.05]).aggregate(ds, out_file, agg_spec=spec)

            with loompy.connect(out_file) as dsagg:
                logging.info(
                    "Finding non-neuronal, housekeeping, and troublemaking genes"
                )
                (nng, blocked) = _gene_selection_L5(dsagg)

                logging.info("Manifold learning on the aggregate file")
                normalizer = cg.Normalizer(False)
                normalizer.fit(dsagg)
                pca = cg.PCAProjection(np.arange(dsagg.shape[1] * 10),
                                       max_n_components=50)
                pca.fit(dsagg, normalizer)
                transformed = pca.transform(dsagg, normalizer)
                k = 40
                bnn = cg.BalancedKNN(k=k, maxl=2 * k)
                bnn.fit(transformed)
                knn = bnn.kneighbors(mode='connectivity')[1][:, 1:]
                n_cells = knn.shape[0]
                a = np.tile(np.arange(n_cells), k)
                b = np.reshape(knn.T, (n_cells * k, ))
                w = np.repeat(1 / np.power(np.arange(1, k + 1), 1.8), n_cells)
                knn = sparse.coo_matrix((w, (a, b)), shape=(n_cells, n_cells))
                threshold = w > 0.025
                mknn = sparse.coo_matrix(
                    (w[threshold], (a[threshold], b[threshold])),
                    shape=(n_cells, n_cells))
                mknn = mknn.minimum(mknn.transpose()).tocoo()
                tsne = cg.TSNE(perplexity=5).layout(transformed)
                dsagg.col_graphs.KNN = knn
                dsagg.col_graphs.MKNN = mknn
                dsagg.ca._X = tsne[:, 0]
                dsagg.ca._Y = tsne[:, 1]

                logging.info("Manifold learning on all cells")
                init = np.zeros((ds.shape[1], 2))
                for lbl in np.unique(ds.ca.Clusters):
                    init[ds.ca.Clusters ==
                         lbl, :] = tsne[lbl, :] + np.random.normal(size=(
                             (ds.ca.Clusters == lbl).sum(), 2))
                ml = cg.ManifoldLearning2(gtsne=True, alpha=1, max_iter=3000)
                (knn, mknn, tsne) = ml.fit(ds,
                                           initial_pos=init,
                                           nng=nng,
                                           blocked_genes=blocked)
                ds.col_graphs.KNN = knn
                ds.col_graphs.MKNN = mknn
                ds.ca._X = tsne[:, 0]
                ds.ca._Y = tsne[:, 1]

                logging.info("Computing auto-annotation")
                aa = cg.AutoAnnotator(root="../auto-annotation/Adolescent/")
                aa.annotate_loom(dsagg)
                aa.save_in_loom(dsagg)

                logging.info("Computing auto-auto-annotation")
                n_clusters = dsagg.shape[1]
                (selected, selectivity, specificity,
                 robustness) = cg.AutoAutoAnnotator(n_genes=6).fit(dsagg)
                dsagg.set_attr("MarkerGenes",
                               np.array([
                                   " ".join(ds.ra.Gene[selected[:, ix]])
                                   for ix in np.arange(n_clusters)
                               ]),
                               axis=1)
                np.set_printoptions(precision=1, suppress=True)
                dsagg.set_attr("MarkerSelectivity",
                               np.array([
                                   str(selectivity[:, ix])
                                   for ix in np.arange(n_clusters)
                               ]),
                               axis=1)
                dsagg.set_attr("MarkerSpecificity",
                               np.array([
                                   str(specificity[:, ix])
                                   for ix in np.arange(n_clusters)
                               ]),
                               axis=1)
                dsagg.set_attr("MarkerRobustness",
                               np.array([
                                   str(robustness[:, ix])
                                   for ix in np.arange(n_clusters)
                               ]),
                               axis=1)