def add_clusters(df_cells, neighbor_dist=50): """Assigns -1 to clusters with only one cell. """ from scipy.spatial.kdtree import KDTree import networkx as nx x = df_cells[GLOBAL_X] + df_cells[POSITION_J] y = df_cells[GLOBAL_Y] + df_cells[POSITION_I] barcodes = df_cells[BARCODE_0] barcodes = np.array(barcodes) kdt = KDTree(np.array([x, y]).T) num_cells = len(df_cells) print('searching for clusters among %d cells' % num_cells) pairs = kdt.query_pairs(neighbor_dist) pairs = np.array(list(pairs)) x = barcodes[pairs] y = x[:, 0] == x[:, 1] G = nx.Graph() G.add_edges_from(pairs[y]) clusters = list(nx.connected_components(G)) cluster_index = np.zeros(num_cells, dtype=int) - 1 for i, c in enumerate(clusters): cluster_index[list(c)] = i df_cells[CLUSTER] = cluster_index return df_cells
def _merge(self, roi): points = [ e[2] for e in roi ] tree = KDTree(points) pp = tree.query_pairs(4) #pp = sorted( pp , key=lambda e: max(roi[e[0]] , roi[e[1]]), reverse=True) skips = set() for i,j in pp: if self.Debug: print '%s(%.2f),%s(%.2f)'%(roi[i][0],roi[i][1], roi[j][0], roi[j][1] ) if i in skips or j in skips: continue skip = i if roi[i][1]<roi[j][1] else j skips.add(skip) rs = ( e for i, e in enumerate(roi) if not i in skips ) rs = sorted( rs , key=lambda e: e[1] )[-4:] return sorted( rs, key=lambda e: e[2][1] )
def _merge(self, roi): points = [e[2] for e in roi] tree = KDTree(points) pp = tree.query_pairs(4) #pp = sorted( pp , key=lambda e: max(roi[e[0]] , roi[e[1]]), reverse=True) skips = set() for i, j in pp: if self.Debug: print '%s(%.2f),%s(%.2f)' % (roi[i][0], roi[i][1], roi[j][0], roi[j][1]) if i in skips or j in skips: continue skip = i if roi[i][1] < roi[j][1] else j skips.add(skip) rs = (e for i, e in enumerate(roi) if not i in skips) rs = sorted(rs, key=lambda e: e[1])[-4:] return sorted(rs, key=lambda e: e[2][1])
def add_clusters(df_cells, barcode_col=BARCODE_0, radius=50, verbose=True, ij=(POSITION_I, POSITION_J)): """Assigns -1 to clusters with only one cell. """ from scipy.spatial.kdtree import KDTree import networkx as nx I, J = ij x = df_cells[GLOBAL_X] + df_cells[J] y = df_cells[GLOBAL_Y] + df_cells[I] barcodes = df_cells[barcode_col] barcodes = np.array(barcodes) kdt = KDTree(np.array([x, y]).T) num_cells = len(df_cells) if verbose: message = 'searching for clusters among {} {} objects' print(message.format(num_cells, barcode_col)) pairs = kdt.query_pairs(radius) pairs = np.array(list(pairs)) x = barcodes[pairs] y = x[:, 0] == x[:, 1] G = nx.Graph() G.add_edges_from(pairs[y]) clusters = list(nx.connected_components(G)) cluster_index = np.zeros(num_cells, dtype=int) - 1 for i, c in enumerate(clusters): cluster_index[list(c)] = i df_cells = df_cells.copy() df_cells[CLUSTER] = cluster_index df_cells[CLUSTER_SIZE] = ( df_cells.groupby(CLUSTER)[barcode_col].transform('size')) df_cells.loc[df_cells[CLUSTER] == -1, CLUSTER_SIZE] = 1 return df_cells