def fc_graph(self, xy, transcriptome, savepath=None): self.xy = xy # spatial features self.transcriptome = transcriptome # genomic features assert self.xy.shape[0] == self.transcriptome.shape[0], (self.xy.shape[0], self.transcriptome.shape[0]) tic = tictoc('Building fully-connected graph...') n = self.xy.shape[0] distance = np.zeros((n,n)) # opposite of adjacency matrix # Add xy distance to top-right triangle if not self.no_spatial: util.debug('setting spatial distance...') for i, xy in enumerate(self.xy): if i % 200 == 0: util.debug(i) for j in range(i+1, n): distance[i,j] += self._xy_dist(self.xy[i,...], self.xy[j,...]) # Add feature distance to top-right triangle util.debug('setting transcriptome distance...') for i, gen in enumerate(self.transcriptome): if i % 200 == 0: util.debug(i) for j in range(i+1, n): distance[i,j] += self._transcriptome_dist(self.transcriptome[i,...], self.transcriptome[j,...]) # Copy to lower-left triangle distance += distance.T # Create adjacency matrix. (Because I will use k-nearest-neighbours, ranking is all that matters, not relative distance between all points.) self.A = (distance.max() - distance + 1.) / (distance.max() + 1.) # Remove self-loops (diagonal) self.A -= np.diag(np.diag(self.A)) # Debug tictoc('Built fully-connected graph.', tic) # Save self._save(savepath, 'fc-graph.npy', self.A) # Return return self.A
def random_walk_laplacian(self): # $L_{RW} = D^{-1} L_U$ Random walk Laplacian if self.Lrw is None: tic = util.tictoc('Making Random-walk Laplacian...') d = np.sum(self.graph, axis=0) rwD = np.diag(d**(-1)) self.Lrw = np.matmul(rwD, self.unnormalized_laplacian()) tictoc('Made Random-walk Laplacian.', tic) return self.Lrw
def normalized_laplacian(self): # $L_N = D^{-1/2} L_U D^{-1/2}$ Normalized Laplacian if self.Ln is None: tic = util.tictoc('Making Normalized Laplacian...') d = np.sum(self.graph, axis=0) normD = np.diag(d**(-1./2.)) self.Ln = np.matmul(np.matmul(normD, self.unnormalized_laplacian()), normD) tictoc('Made Normalized Laplacian.', tic) return self.Ln
def unnormalized_laplacian(self): # (unnormalized Laplacian) = (degree matrix of graph) - (adjacency matrix of graph) if self.Lu is None: tic = util.tictoc('Making Unnormalized Laplacian...') d = np.sum(self.graph, axis=0) D = np.diag(d) self.Lu = D - self.graph tictoc('Made Unnormalized Laplacian.', tic) return self.Lu
def sparsify(self, k, savepath=None): nnzs = len(np.nonzero(self.A)[0]) tic = util.tictoc('Sparsifying matrix A (%d nonzeros)...' % nnzs) idxs = self._sparsified(k) mask = np.zeros_like(self.A) for r, row in enumerate(idxs): mask[r, row] = 1 self.A *= mask nnzs = len(np.nonzero(self.A)[0]) # Make diagonal self.A = (self.A + self.A.T) / 2. nnzs = len(np.nonzero(self.A)[0]) util.tictoc('Sparsified matrix A (%d nonzeros).' % nnzs, tic) self._save(savepath, 'sparse-graph.npy', self.A)
def _cluster(self, embedding, k): tic = util.tictoc('clustering...') kmeans = sklearn.cluster.KMeans(k).fit(embedding) util.tictoc('clustered.', tic) self._save('kmeans-dim-%s-k-%d.pkl' % (str(self.dim), k), kmeans) return kmeans