def reduce_scalelevel(scale, n_nodes, uv_ids, costs, merge_edge_ids, initial_node_labeling, shape, block_shape, new_block_shape, cost_accumulation="sum"): n_edges = len(uv_ids) # merge node pairs with ufd ufd = nifty.ufd.ufd(n_nodes) merge_pairs = uv_ids[merge_edge_ids] ufd.merge(merge_pairs) # get the node results and label them consecutively node_labeling = ufd.elementLabeling() node_labeling, max_new_id, _ = vigra.analysis.relabelConsecutive(node_labeling) n_new_nodes = max_new_id + 1 # get the labeling of initial nodes if initial_node_labeling is None: new_initial_node_labeling = node_labeling else: # should this ever become a bottleneck, we can parallelize this in nifty # but for now this would really be premature optimization new_initial_node_labeling = node_labeling[initial_node_labeling] # get new edge costs edge_mapping = nifty.tools.EdgeMapping(uv_ids, node_labeling, numberOfThreads=8) new_uv_ids = edge_mapping.newUvIds() new_costs = edge_mapping.mapEdgeValues(costs, cost_accumulation, numberOfThreads=8) assert len(new_uv_ids) == len(new_costs) print("Reduced graph from", n_nodes, "to", n_new_nodes, "nodes;", n_edges, "to", len(new_uv_ids), "edges.") # map the new graph (= node labeling and corresponding edges) # to the next scale level f_nodes = z5py.File('./nodes_to_blocks.n5', use_zarr_format=False) f_nodes.create_group('s%i' % (scale + 1,)) node_out_prefix = './nodes_to_blocks.n5/s%i/node_' % (scale + 1,) f_graph = z5py.File('./graph.n5', use_zarr_format=False) f_graph.create_group('merged_graphs/s%i' % scale) if scale == 0: block_in_prefix = './graph.n5/sub_graphs/s%i/block_' % scale else: block_in_prefix = './graph.n5/merged_graphs/s%i/block_' % scale block_out_prefix = './graph.n5/merged_graphs/s%i/block_' % (scale + 1,) edge_labeling = edge_mapping.edgeMapping() ndist.serializeMergedGraph(block_in_prefix, shape, block_shape, new_block_shape, n_new_nodes, node_labeling, edge_labeling, node_out_prefix, block_out_prefix, 8) return n_new_nodes, new_uv_ids, new_costs, new_initial_node_labeling
def _serialize_new_problem(problem_path, n_new_nodes, new_uv_ids, node_labeling, edge_labeling, new_costs, new_initial_node_labeling, shape, scale, initial_block_shape, n_threads, roi_begin, roi_end): next_scale = scale + 1 f_out = z5py.File(problem_path) g_out = f_out.require_group('s%i' % next_scale) g_out.require_group('sub_graphs') block_in_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs', 'block_') block_out_prefix = os.path.join(problem_path, 's%i' % next_scale, 'sub_graphs', 'block_') factor = 2**scale block_shape = [factor * bs for bs in initial_block_shape] new_factor = 2**(scale + 1) new_block_shape = [new_factor * bs for bs in initial_block_shape] # NOTE we do not need to serialize the sub-edges in the current implementation # of the blockwise multicut workflow, because we always load the full graph # in 'solve_subproblems' # serialize the new sub-graphs block_ids = vu.blocks_in_volume(shape, new_block_shape, roi_begin, roi_end) ndist.serializeMergedGraph(graphBlockPrefix=block_in_prefix, shape=shape, blockShape=block_shape, newBlockShape=new_block_shape, newBlockIds=block_ids, nodeLabeling=node_labeling, edgeLabeling=edge_labeling, graphOutPrefix=block_out_prefix, numberOfThreads=n_threads, serializeEdges=False) # serialize the multicut problem for the next scale level graph_key = 's%i/graph' % scale with vu.file_reader(problem_path, 'r') as f: ignore_label = f[graph_key].attrs['ignoreLabel'] n_new_edges = len(new_uv_ids) graph_out = g_out.require_group('graph') graph_out.attrs['ignoreLabel'] = ignore_label graph_out.attrs['numberOfNodes'] = n_new_nodes graph_out.attrs['numberOfEdges'] = n_new_edges def _serialize(out_group, name, data, dtype='uint64'): ser_chunks = (min(data.shape[0], 262144), 2) if data.ndim == 2 else\ (min(data.shape[0], 262144),) ds_ser = out_group.require_dataset(name, dtype=dtype, shape=data.shape, chunks=ser_chunks, compression='gzip') ds_ser.n_threads = n_threads ds_ser[:] = data # NOTE we don not need to serialize the nodes cause they are # consecutive anyway # _serialize('nodes', np.arange(n_new_nodes).astype('uint64')) # serialize the new graph, the node labeling and the new costs _serialize(graph_out, 'edges', new_uv_ids) _serialize(g_out, 'node_labeling', new_initial_node_labeling) _serialize(g_out, 'costs', new_costs, dtype='float32') return n_new_edges
def serialize_new_problem(graph_path, n_new_nodes, new_uv_ids, node_labeling, edge_labeling, new_costs, new_initial_node_labeling, shape, scale, initial_block_shape, tmp_folder, n_threads): next_scale = scale + 1 merged_graph_path = os.path.join(tmp_folder, 'merged_graph.n5') f_graph = z5py.File(merged_graph_path, use_zarr_format=False) # if 's%i' % next_scale not in f_graph: # g_out = f_graph.create_group('s%i' % next_scale) # else: # g_out = f_graph['s%i' % next_scale] # if 'sub_graphs' not in g_out: # g_out.create_group('sub_graphs') g_out = f_graph.create_group('s%i' % next_scale) g_out.create_group('sub_graphs') # TODO this should be handled by symlinks if scale == 0: block_in_prefix = os.path.join(graph_path, 'sub_graphs', 's%i' % scale, 'block_') else: block_in_prefix = os.path.join(tmp_folder, 'merged_graph.n5', 's%i' % scale, 'sub_graphs', 'block_') block_out_prefix = os.path.join(tmp_folder, 'merged_graph.n5', 's%i' % next_scale, 'sub_graphs', 'block_') factor = 2**scale block_shape = [factor * bs for bs in initial_block_shape] new_factor = 2**(scale + 1) new_block_shape = [new_factor * bs for bs in initial_block_shape] ndist.serializeMergedGraph(block_in_prefix, shape, block_shape, new_block_shape, n_new_nodes, node_labeling, edge_labeling, block_out_prefix, n_threads) # serialize the full graph for the next scale level n_new_edges = len(new_uv_ids) g_out.attrs['numberOfNodes'] = n_new_nodes g_out.attrs['numberOfEdges'] = n_new_edges shape_edges = (n_new_edges, 2) ds_edges = g_out.create_dataset('edges', dtype='uint64', shape=shape_edges, chunks=shape_edges) ds_edges[:] = new_uv_ids nodes = np.unique(new_uv_ids) shape_nodes = (len(nodes), ) ds_nodes = g_out.create_dataset('nodes', dtype='uint64', shape=shape_nodes, chunks=shape_nodes) ds_nodes[:] = nodes # serialize the node labeling shape_node_labeling = (len(new_initial_node_labeling), ) ds_node_labeling = g_out.create_dataset('nodeLabeling', dtype='uint64', shape=shape_node_labeling, chunks=shape_node_labeling) ds_node_labeling[:] = new_initial_node_labeling # serialize the new costs shape_costs = (n_new_edges, ) if 'costs' not in g_out: ds_costs = g_out.create_dataset('costs', dtype='float32', shape=shape_costs, chunks=shape_costs) else: ds_costs = g_out['costs'] ds_costs[:] = new_costs return n_new_edges
def _serialize_new_problem(problem_path, n_new_nodes, new_uv_ids, node_labeling, edge_labeling, new_costs, new_initial_node_labeling, new_lifted_uvs, new_lifted_costs, shape, scale, initial_block_shape, n_threads, roi_begin, roi_end, lifted_prefix): assert len(new_costs) == len(new_uv_ids) assert len(new_lifted_uvs) == len(new_lifted_costs) next_scale = scale + 1 f_out = z5py.File(problem_path) g_out = f_out.require_group('s%i' % next_scale) # NOTE we use different sub-graph identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the sub-graphs come from the GraphWorkflow and # are hence identical sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc' g_out.require_group(sub_graph_identifier) subgraph_in_key = 's%i/%s' % (scale, sub_graph_identifier) subgraph_out_key = 's%i/sub_graphs_lmc' % next_scale factor = 2**scale block_shape = [factor * bs for bs in initial_block_shape] new_factor = 2**(scale + 1) new_block_shape = [new_factor * bs for bs in initial_block_shape] # NOTE we do not need to serialize the sub-edges in the current implementation # of the blockwise multicut workflow, because we always load the full graph # in 'solve_subproblems' # serialize the new sub-graphs block_ids = vu.blocks_in_volume(shape, new_block_shape, roi_begin, roi_end) ndist.serializeMergedGraph(graphPath=problem_path, graphBlockPrefix=subgraph_in_key, shape=shape, blockShape=block_shape, newBlockShape=new_block_shape, newBlockIds=block_ids, nodeLabeling=node_labeling, edgeLabeling=edge_labeling, outPath=problem_path, graphOutPrefix=subgraph_out_key, numberOfThreads=n_threads, serializeEdges=False) # serialize the multicut problem for the next scale level graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph' with vu.file_reader(problem_path, 'r') as f: ignore_label = f[graph_key].attrs['ignore_label'] n_new_edges = len(new_uv_ids) graph_out = g_out.require_group('graph_lmc') graph_out.attrs['ignore_label'] = ignore_label graph_out.attrs['numberOfNodes'] = n_new_nodes graph_out.attrs['numberOfEdges'] = n_new_edges graph_out.attrs['shape'] = shape def _serialize(out_group, name, data, dtype='uint64'): ser_chunks = (min(data.shape[0], 262144), 2) if data.ndim == 2 else\ (min(data.shape[0], 262144),) ds_ser = out_group.require_dataset(name, dtype=dtype, shape=data.shape, chunks=ser_chunks, compression='gzip') ds_ser.n_threads = n_threads ds_ser[:] = data # NOTE we don not need to serialize the nodes cause they are # consecutive anyway # _serialize('nodes', np.arange(n_new_nodes).astype('uint64')) # serialize the new graph, the node labeling and the new costs _serialize(graph_out, 'edges', new_uv_ids) _serialize(g_out, 'node_labeling_lmc', new_initial_node_labeling) _serialize(g_out, 'costs_lmc', new_costs, dtype='float32') # serialize lifted uvs and costs _serialize(g_out, 'lifted_nh_%s' % lifted_prefix, new_lifted_uvs) _serialize(g_out, 'lifted_costs_%s' % lifted_prefix, new_lifted_costs, dtype='float32') return n_new_edges
def serialize_new_problem(graph_path, n_new_nodes, new_uv_ids, node_labeling, edge_labeling, new_costs, new_initial_node_labeling, shape, scale, initial_block_shape, tmp_folder, n_threads, roi): next_scale = scale + 1 merged_graph_path = os.path.join(tmp_folder, 'merged_graph.n5') f_graph = z5py.File(merged_graph_path, use_zarr_format=False) # if 's%i' % next_scale not in f_graph: # g_out = f_graph.create_group('s%i' % next_scale) # else: # g_out = f_graph['s%i' % next_scale] # if 'sub_graphs' not in g_out: # g_out.create_group('sub_graphs') g_out = f_graph.require_group('s%i' % next_scale) g_out.require_group('sub_graphs') # TODO this should be handled by symlinks if scale == 0: block_in_prefix = os.path.join(graph_path, 'sub_graphs', 's%i' % scale, 'block_') else: block_in_prefix = os.path.join(tmp_folder, 'merged_graph.n5', 's%i' % scale, 'sub_graphs', 'block_') block_out_prefix = os.path.join(tmp_folder, 'merged_graph.n5', 's%i' % next_scale, 'sub_graphs', 'block_') factor = 2**scale block_shape = [factor * bs for bs in initial_block_shape] new_factor = 2**(scale + 1) new_block_shape = [new_factor * bs for bs in initial_block_shape] shape = z5py.File(graph_path).attrs['shape'] blocking = nifty.tools.blocking([0, 0, 0], shape, new_block_shape) if roi is None: new_block_ids = list(range(blocking.numberOfBlocks)) else: new_block_ids = blocking.getBlockIdsOverlappingBoundingBox(roi[0], roi[1], [0, 0, 0]).tolist() print("Serializing new graph") ndist.serializeMergedGraph(block_in_prefix, shape, block_shape, new_block_shape, new_block_ids, n_new_nodes, node_labeling, edge_labeling, block_out_prefix, n_threads) # serialize the full graph for the next scale level n_new_edges = len(new_uv_ids) g_out.attrs['numberOfNodes'] = n_new_nodes g_out.attrs['numberOfEdges'] = n_new_edges shape_edges = (n_new_edges, 2) ds_edges = g_out.require_dataset('edges', dtype='uint64', shape=shape_edges, chunks=shape_edges) ds_edges.n_threads = n_threads ds_edges[:] = new_uv_ids nodes = np.unique(new_uv_ids) shape_nodes = (len(nodes),) ds_nodes = g_out.require_dataset('nodes', dtype='uint64', shape=shape_nodes, chunks=shape_nodes) ds_nodes.n_threads = n_threads ds_nodes[:] = nodes # serialize the node labeling shape_node_labeling = (len(new_initial_node_labeling),) ds_node_labeling = g_out.create_dataset('nodeLabeling', dtype='uint64', shape=shape_node_labeling, chunks=shape_node_labeling) ds_node_labeling[:] = new_initial_node_labeling # serialize the new costs shape_costs = (n_new_edges,) ds_costs = g_out.require_dataset('costs', dtype='float32', shape=shape_costs, chunks=shape_costs) ds_costs.n_threads = n_threads ds_costs[:] = new_costs return n_new_edges