def solve_subproblem(block_id): # load the nodes in this sub-block and map them # to our current node-labeling block_path = block_prefix + str(block_id) assert os.path.exists(block_path), block_path nodes = ndist.loadNodes(block_path) # TODO we extract the graph locally with ndist # the issue with this is that we store the node 2 block list as n5 which could wack the file system ... # if we change this storage to hdf5, everything should be fine inner_edges, outer_edges, sub_uvs = ndist.extractSubgraphFromNodes(nodes, node_storage_prefix, block_prefix) # we might only have a single node, but we still need to find the outer edges if len(nodes) <= 1: return outer_edges assert len(sub_uvs) == len(inner_edges) n_local_nodes = int(sub_uvs.max() + 1) sub_graph = nifty.graph.undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_costs = costs[inner_edges] sub_result = agglomerator(sub_graph, sub_costs) sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]] assert len(sub_edgeresult) == len(inner_edges) cut_edge_ids = inner_edges[sub_edgeresult] return np.concatenate([cut_edge_ids, outer_edges])
def debug_subresult(block_id=1): example_path = '/home/cpape/Work/data/isbi2012/cluster_example/isbi_train.n5' block_prefix = os.path.join(example_path, 's0', 'sub_graphs', 'block_') graph = ndist.Graph(os.path.join(example_path, 'graph')) block_path = block_prefix + str(block_id) nodes = ndist.loadNodes(block_path) inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes) block_res_path = './tmp/subproblem_results/s0_block%i.npy' % block_id res = np.load(block_res_path) merge_edges = np.ones(graph.numberOfEdges, dtype='bool') merge_edges[res] = False merge_edges[outer_edges] = False uv_ids = graph.uvIds() n_nodes = int(uv_ids.max()) + 1 ufd = nifty.ufd.ufd(n_nodes) ufd.merge(uv_ids[merge_edges]) node_labels = ufd.elementLabeling() ws = z5py.File(example_path)['volumes/watersheds'][:] rag = nrag.gridRag(ws, numberOfLabels=n_nodes) seg = nrag.projectScalarNodeDataToPixels(rag, node_labels) view([ws, seg])
def debug_subresult(block_id=1): from cremi_tools.viewer.volumina import view path = '/g/kreshuk/data/arendt/platyneris_v1/membrane_training_data/validation/segmentation/val_block_01.n5' tmp_folder = './tmp_plat_val' block_prefix = os.path.join(path, 's0', 'sub_graphs', 'block_') graph = ndist.Graph(os.path.join(path, 'graph')) block_path = block_prefix + str(block_id) nodes = ndist.loadNodes(block_path) nodes = nodes[1:] inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes) block_res_path = os.path.join( tmp_folder, 'subproblem_results/s0_block%i.npy' % block_id) res = np.load(block_res_path) merge_edges = np.ones(graph.numberOfEdges, dtype='bool') merge_edges[res] = False merge_edges[outer_edges] = False uv_ids = graph.uvIds() n_nodes = int(uv_ids.max()) + 1 ufd = nufd.ufd(n_nodes) ufd.merge(uv_ids[merge_edges]) node_labels = ufd.elementLabeling() ws = z5py.File(path)['volumes/watershed'][:] seg = nt.take(node_labels, ws) view([ws, seg])
def solve_block_subproblem(block_id, graph, block_prefix, costs, agglomerator, shape, block_shape, cut_outer_edges): # load the nodes in this sub-block and map them # to our current node-labeling block_path = block_prefix + str(block_id) assert os.path.exists(block_path), block_path nodes = ndist.loadNodes(block_path) # # the ignore label (== 0) spans a lot of blocks, hence it would slow down our # # subgraph extraction, which looks at all the blocks containing the node, # # enormously, so we skip it # # we make sure that these are cut later # if nodes[0] == 0: # nodes = nodes[1:] # # if we have no nodes left after, we return none # if len(nodes) == 0: # return None # # extract the local subgraph # inner_edges, outer_edges, sub_uvs = ndist.extractSubgraphFromNodes(nodes, # block_prefix, # shape, # block_shape, # block_id) inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes) # if we had only a single node (i.e. no edge, return the outer edges) if len(nodes) == 1: return outer_edges if cut_outer_edges else None assert len(sub_uvs) == len(inner_edges) assert len(sub_uvs) > 0, str(block_id) n_local_nodes = int(sub_uvs.max() + 1) sub_graph = undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_costs = costs[inner_edges] assert len(sub_costs) == sub_graph.numberOfEdges # print(len(sub_costs)) sub_result = agglomerator(sub_graph, sub_costs) sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]] assert len(sub_edgeresult) == len(inner_edges) cut_edge_ids = inner_edges[sub_edgeresult] # print("block", block_id, "number cut_edges:", len(cut_edge_ids)) # print("block", block_id, "number outer_edges:", len(outer_edges)) if cut_outer_edges: cut_edge_ids = np.concatenate([cut_edge_ids, outer_edges]) return cut_edge_ids
def check_block(block_id, blocking, ds, graph_block_prefix): block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) seg = ds[bb] nodes_seg = np.unique(seg) graph_path = graph_block_prefix + str(block_id) nodes = ndist.loadNodes(graph_path) same_len = len(nodes_seg) == len(nodes) if not same_len: return block_id same_nodes = np.allclose(nodes, nodes_seg) if not same_nodes: return block_id return None
def _check_subresults(self): f = z5py.File(self.input_path) f_out = z5py.File(self.output_path) ds_ws = f[self.input_key] shape = ds_ws.shape blocking = nt.blocking([0, 0, 0], list(shape), self.block_shape) f_graph = z5py.File(self.output_path) halo = [1, 1, 1] for block_id in range(blocking.numberOfBlocks): # get the block with the appropriate halo # and the corresponding bounding box block = blocking.getBlockWithHalo(block_id, halo) outer_block, inner_block = block.outerBlock, block.innerBlock bb = tuple( slice(beg, end) for beg, end in zip(inner_block.begin, outer_block.end)) # check that the rois are correct block_key = os.path.join('s0', 'sub_graphs', 'block_%i' % block_id) roi_begin = f_out[block_key].attrs['roiBegin'] roi_end = f_out[block_key].attrs['roiEnd'] self.assertEqual(inner_block.begin, roi_begin) self.assertEqual(outer_block.end, roi_end) # load the graph graph_path = os.path.join(self.output_path, block_key) graph = ndist.Graph(graph_path) nodes_deser = ndist.loadNodes(graph_path) # load the segmentation and check that the nodes # are correct seg = ds_ws[bb] nodes = graph.nodes() nodes_ws = np.unique(seg) self.assertTrue(np.allclose(nodes_ws, nodes_deser)) self.assertTrue(np.allclose(nodes_ws, nodes)) # compute the rag and check that the graph is correct rag = nrag.gridRag(seg, numberOfLabels=int(seg.max()) + 1) # number of nodes in nifty can be larger self.assertGreaterEqual(rag.numberOfNodes, graph.numberOfNodes) self.assertEqual(rag.numberOfEdges, graph.numberOfEdges) self.assertTrue(np.allclose(rag.uvIds(), graph.uvIds()))
def read_subres(block_id): block = blocking.getBlock(block_id) # load nodes corresponding to this block block_path = block_node_prefix + str(block_id) nodes = ndist.loadNodes(block_path) # load the sub result for this block chunk = tuple(beg // bs for beg, bs in zip(block.begin, blocking.blockShape)) subres = ds_results.read_chunk(chunk) # subres is None -> this block has ignore label # and has no edgees. Note that this does not imply that the # block ONLY has ignore label (or only one ordinary node) # because multiple ordinary nodes could be seperated by the ignore label # and thus not share an edge. if subres is None: assert 0 in nodes return None assert len(nodes) == len(subres), "block %i: %i, %i" % (block_id, len(nodes), len(subres)) return nodes, subres, int(subres.max()) + 1
def test_extraction(self): # load complete graph graph = ndist.loadAsUndirectedGraph(self.path_to_graph) # TODO test for more block-ids block_ids = list(range(64)) for block_id in block_ids: block_path = self.graph_block_prefix + str(block_id) assert os.path.exists(block_path), block_path nodes = ndist.loadNodes(block_path) if len(nodes) == 1: continue # get the subgraph from in-memory graph inner_edges_a, outer_edges_a, graph_a = graph.extractSubgraphFromNodes( nodes) # get the subgraph from disc inner_edges_b, outer_edges_b, uvs_b = ndist.extractSubgraphFromNodes( nodes, self.path_to_nodes, self.graph_block_prefix) # tests for equality n_nodes_a = graph_a.numberOfNodes uvs_a = graph_a.uvIds() n_nodes_b = int(uvs_b.max() + 1) # test graph self.assertEqual(n_nodes_a, n_nodes_b) self.assertEqual(uvs_a.shape, uvs_b.shape) self.assertTrue((uvs_a == uvs_b).all()) # test edge ids self.assertEqual(inner_edges_a.shape, inner_edges_b.shape) self.assertTrue((inner_edges_a == inner_edges_b).all()) self.assertEqual(outer_edges_a.shape, outer_edges_b.shape) self.assertTrue((outer_edges_a == outer_edges_b).all())
def _solve_block_problem(block_id, graph, uv_ids, block_prefix, costs, agglomerator, ignore_label, blocking, out, time_limit): fu.log("Start processing block %i" % block_id) # load the nodes in this sub-block and map them # to our current node-labeling block_path = block_prefix + str(block_id) assert os.path.exists(block_path), block_path nodes = ndist.loadNodes(block_path) # if we have an ignore label, remove zero from the nodes # (nodes are sorted, so it will always be at pos 0) if ignore_label and nodes[0] == 0: nodes = nodes[1:] removed_ignore_label = True if len(nodes) == 0: fu.log_block_success(block_id) return else: removed_ignore_label = False # we allow for invalid nodes here, # which can occur for un-connected graphs resulting from bad masks ... inner_edges, outer_edges = graph.extractSubgraphFromNodes( nodes, allowInvalidNodes=True) # if we only have no inner edges, return # the outer edges as cut edges if len(inner_edges) == 0: if len(nodes) > 1: assert removed_ignore_label,\ "Can only have trivial sub-graphs for more than one node if we removed ignore label" cut_edge_ids = outer_edges sub_result = None fu.log("Block %i: has no inner edges" % block_id) # otherwise solve the multicut for this block else: fu.log("Block %i: Solving sub-block with %i nodes and %i edges" % (block_id, len(nodes), len(inner_edges))) sub_uvs = uv_ids[inner_edges] # relabel the sub-nodes and associated uv-ids for more efficient processing nodes_relabeled, max_id, mapping = vigra.analysis.relabelConsecutive( nodes, start_label=0, keep_zeros=False) sub_uvs = nt.takeDict(mapping, sub_uvs) n_local_nodes = max_id + 1 sub_graph = nifty.graph.undirectedGraph(n_local_nodes) sub_graph.insertEdges(sub_uvs) sub_costs = costs[inner_edges] assert len(sub_costs) == sub_graph.numberOfEdges # solve multicut and relabel the result sub_result = agglomerator(sub_graph, sub_costs, time_limit=time_limit) assert len(sub_result) == len(nodes), "%i, %i" % (len(sub_result), len(nodes)) sub_edgeresult = sub_result[sub_uvs[:, 0]] != sub_result[sub_uvs[:, 1]] assert len(sub_edgeresult) == len(inner_edges) cut_edge_ids = inner_edges[sub_edgeresult] cut_edge_ids = np.concatenate([cut_edge_ids, outer_edges]) _, res_max_id, _ = vigra.analysis.relabelConsecutive(sub_result, start_label=1, keep_zeros=False, out=sub_result) fu.log("Block %i: Subresult has %i unique ids" % (block_id, res_max_id)) # IMPORTANT !!! # we can only add back the ignore label after getting the edge-result !!! if removed_ignore_label: sub_result = np.concatenate((np.zeros(1, dtype='uint64'), sub_result)) # get chunk id of this block block = blocking.getBlock(block_id) chunk_id = tuple(beg // sh for beg, sh in zip(block.begin, blocking.blockShape)) # serialize the cut-edge-ids and the (local) node labeling ds_edge_res = out['cut_edge_ids'] fu.log("Block %i: Serializing %i cut edges" % (block_id, len(cut_edge_ids))) ds_edge_res.write_chunk(chunk_id, cut_edge_ids, True) if sub_result is not None: ds_node_res = out['node_result'] fu.log("Block %i: Serializing %i node results" % (block_id, len(sub_result))) ds_node_res.write_chunk(chunk_id, sub_result, True) fu.log_block_success(block_id)
def load_nodes(self, graph_path, graph_key): graph_ds = z5py.File(graph_path)[graph_key] n_nodes = graph_ds.attrs['numberOfNodes'] nodes = ndist.loadNodes(os.path.join(graph_path, graph_key)) self.assertEqual(n_nodes, len(nodes)) return nodes