示例#1
0
def reduce_scalelevel(scale, n_nodes, uv_ids, costs,
                      merge_edge_ids,
                      initial_node_labeling,
                      shape, block_shape, new_block_shape,
                      cost_accumulation="sum"):

    n_edges = len(uv_ids)
    # merge node pairs with ufd
    ufd = nifty.ufd.ufd(n_nodes)
    merge_pairs = uv_ids[merge_edge_ids]
    ufd.merge(merge_pairs)

    # get the node results and label them consecutively
    node_labeling = ufd.elementLabeling()
    node_labeling, max_new_id, _ = vigra.analysis.relabelConsecutive(node_labeling)
    n_new_nodes = max_new_id + 1

    # get the labeling of initial nodes
    if initial_node_labeling is None:
        new_initial_node_labeling = node_labeling
    else:
        # should this ever become a bottleneck, we can parallelize this in nifty
        # but for now this would really be premature optimization
        new_initial_node_labeling = node_labeling[initial_node_labeling]

    # get new edge costs
    edge_mapping = nifty.tools.EdgeMapping(uv_ids, node_labeling, numberOfThreads=8)
    new_uv_ids = edge_mapping.newUvIds()

    new_costs = edge_mapping.mapEdgeValues(costs, cost_accumulation, numberOfThreads=8)
    assert len(new_uv_ids) == len(new_costs)

    print("Reduced graph from", n_nodes, "to", n_new_nodes, "nodes;",
          n_edges, "to", len(new_uv_ids), "edges.")

    # map the new graph (= node labeling and corresponding edges)
    # to the next scale level
    f_nodes = z5py.File('./nodes_to_blocks.n5', use_zarr_format=False)
    f_nodes.create_group('s%i' % (scale + 1,))
    node_out_prefix = './nodes_to_blocks.n5/s%i/node_' % (scale + 1,)
    f_graph = z5py.File('./graph.n5', use_zarr_format=False)
    f_graph.create_group('merged_graphs/s%i' % scale)
    if scale == 0:
        block_in_prefix = './graph.n5/sub_graphs/s%i/block_' % scale
    else:
        block_in_prefix = './graph.n5/merged_graphs/s%i/block_' % scale

    block_out_prefix = './graph.n5/merged_graphs/s%i/block_' % (scale + 1,)

    edge_labeling = edge_mapping.edgeMapping()
    ndist.serializeMergedGraph(block_in_prefix, shape,
                               block_shape, new_block_shape,
                               n_new_nodes,
                               node_labeling, edge_labeling,
                               node_out_prefix, block_out_prefix, 8)

    return n_new_nodes, new_uv_ids, new_costs, new_initial_node_labeling
示例#2
0
def _serialize_new_problem(problem_path, n_new_nodes, new_uv_ids,
                           node_labeling, edge_labeling, new_costs,
                           new_initial_node_labeling, shape, scale,
                           initial_block_shape, n_threads, roi_begin, roi_end):

    next_scale = scale + 1
    f_out = z5py.File(problem_path)
    g_out = f_out.require_group('s%i' % next_scale)
    g_out.require_group('sub_graphs')

    block_in_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs',
                                   'block_')
    block_out_prefix = os.path.join(problem_path, 's%i' % next_scale,
                                    'sub_graphs', 'block_')

    factor = 2**scale
    block_shape = [factor * bs for bs in initial_block_shape]

    new_factor = 2**(scale + 1)
    new_block_shape = [new_factor * bs for bs in initial_block_shape]

    # NOTE we do not need to serialize the sub-edges in the current implementation
    # of the blockwise multicut workflow, because we always load the full graph
    # in 'solve_subproblems'

    # serialize the new sub-graphs
    block_ids = vu.blocks_in_volume(shape, new_block_shape, roi_begin, roi_end)
    ndist.serializeMergedGraph(graphBlockPrefix=block_in_prefix,
                               shape=shape,
                               blockShape=block_shape,
                               newBlockShape=new_block_shape,
                               newBlockIds=block_ids,
                               nodeLabeling=node_labeling,
                               edgeLabeling=edge_labeling,
                               graphOutPrefix=block_out_prefix,
                               numberOfThreads=n_threads,
                               serializeEdges=False)

    # serialize the multicut problem for the next scale level

    graph_key = 's%i/graph' % scale
    with vu.file_reader(problem_path, 'r') as f:
        ignore_label = f[graph_key].attrs['ignoreLabel']

    n_new_edges = len(new_uv_ids)
    graph_out = g_out.require_group('graph')
    graph_out.attrs['ignoreLabel'] = ignore_label
    graph_out.attrs['numberOfNodes'] = n_new_nodes
    graph_out.attrs['numberOfEdges'] = n_new_edges

    def _serialize(out_group, name, data, dtype='uint64'):
        ser_chunks = (min(data.shape[0], 262144), 2) if data.ndim == 2 else\
            (min(data.shape[0], 262144),)
        ds_ser = out_group.require_dataset(name,
                                           dtype=dtype,
                                           shape=data.shape,
                                           chunks=ser_chunks,
                                           compression='gzip')
        ds_ser.n_threads = n_threads
        ds_ser[:] = data

    # NOTE we don not need to serialize the nodes cause they are
    # consecutive anyway
    # _serialize('nodes', np.arange(n_new_nodes).astype('uint64'))

    # serialize the new graph, the node labeling and the new costs
    _serialize(graph_out, 'edges', new_uv_ids)
    _serialize(g_out, 'node_labeling', new_initial_node_labeling)
    _serialize(g_out, 'costs', new_costs, dtype='float32')

    return n_new_edges
def serialize_new_problem(graph_path, n_new_nodes, new_uv_ids, node_labeling,
                          edge_labeling, new_costs, new_initial_node_labeling,
                          shape, scale, initial_block_shape, tmp_folder,
                          n_threads):

    next_scale = scale + 1
    merged_graph_path = os.path.join(tmp_folder, 'merged_graph.n5')
    f_graph = z5py.File(merged_graph_path, use_zarr_format=False)
    # if 's%i' % next_scale not in f_graph:
    #     g_out = f_graph.create_group('s%i' % next_scale)
    # else:
    #     g_out = f_graph['s%i' % next_scale]
    # if 'sub_graphs' not in g_out:
    #     g_out.create_group('sub_graphs')
    g_out = f_graph.create_group('s%i' % next_scale)
    g_out.create_group('sub_graphs')

    # TODO this should be handled by symlinks
    if scale == 0:
        block_in_prefix = os.path.join(graph_path, 'sub_graphs', 's%i' % scale,
                                       'block_')
    else:
        block_in_prefix = os.path.join(tmp_folder, 'merged_graph.n5',
                                       's%i' % scale, 'sub_graphs', 'block_')

    block_out_prefix = os.path.join(tmp_folder, 'merged_graph.n5',
                                    's%i' % next_scale, 'sub_graphs', 'block_')

    factor = 2**scale
    block_shape = [factor * bs for bs in initial_block_shape]

    new_factor = 2**(scale + 1)
    new_block_shape = [new_factor * bs for bs in initial_block_shape]

    ndist.serializeMergedGraph(block_in_prefix, shape, block_shape,
                               new_block_shape, n_new_nodes, node_labeling,
                               edge_labeling, block_out_prefix, n_threads)

    # serialize the full graph for the next scale level
    n_new_edges = len(new_uv_ids)
    g_out.attrs['numberOfNodes'] = n_new_nodes
    g_out.attrs['numberOfEdges'] = n_new_edges

    shape_edges = (n_new_edges, 2)
    ds_edges = g_out.create_dataset('edges',
                                    dtype='uint64',
                                    shape=shape_edges,
                                    chunks=shape_edges)
    ds_edges[:] = new_uv_ids

    nodes = np.unique(new_uv_ids)
    shape_nodes = (len(nodes), )
    ds_nodes = g_out.create_dataset('nodes',
                                    dtype='uint64',
                                    shape=shape_nodes,
                                    chunks=shape_nodes)
    ds_nodes[:] = nodes

    # serialize the node labeling
    shape_node_labeling = (len(new_initial_node_labeling), )
    ds_node_labeling = g_out.create_dataset('nodeLabeling',
                                            dtype='uint64',
                                            shape=shape_node_labeling,
                                            chunks=shape_node_labeling)
    ds_node_labeling[:] = new_initial_node_labeling

    # serialize the new costs
    shape_costs = (n_new_edges, )
    if 'costs' not in g_out:
        ds_costs = g_out.create_dataset('costs',
                                        dtype='float32',
                                        shape=shape_costs,
                                        chunks=shape_costs)
    else:
        ds_costs = g_out['costs']
    ds_costs[:] = new_costs

    return n_new_edges
def _serialize_new_problem(problem_path, n_new_nodes, new_uv_ids,
                           node_labeling, edge_labeling, new_costs,
                           new_initial_node_labeling, new_lifted_uvs,
                           new_lifted_costs, shape, scale, initial_block_shape,
                           n_threads, roi_begin, roi_end, lifted_prefix):

    assert len(new_costs) == len(new_uv_ids)
    assert len(new_lifted_uvs) == len(new_lifted_costs)
    next_scale = scale + 1
    f_out = z5py.File(problem_path)
    g_out = f_out.require_group('s%i' % next_scale)

    # NOTE we use different sub-graph identifiers for multicut and lifted multicut
    # in order to run both in the same n5-container.
    # However, for scale level 0 the sub-graphs come from the GraphWorkflow and
    # are hence identical
    sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc'
    g_out.require_group(sub_graph_identifier)

    subgraph_in_key = 's%i/%s' % (scale, sub_graph_identifier)
    subgraph_out_key = 's%i/sub_graphs_lmc' % next_scale

    factor = 2**scale
    block_shape = [factor * bs for bs in initial_block_shape]

    new_factor = 2**(scale + 1)
    new_block_shape = [new_factor * bs for bs in initial_block_shape]

    # NOTE we do not need to serialize the sub-edges in the current implementation
    # of the blockwise multicut workflow, because we always load the full graph
    # in 'solve_subproblems'

    # serialize the new sub-graphs
    block_ids = vu.blocks_in_volume(shape, new_block_shape, roi_begin, roi_end)
    ndist.serializeMergedGraph(graphPath=problem_path,
                               graphBlockPrefix=subgraph_in_key,
                               shape=shape,
                               blockShape=block_shape,
                               newBlockShape=new_block_shape,
                               newBlockIds=block_ids,
                               nodeLabeling=node_labeling,
                               edgeLabeling=edge_labeling,
                               outPath=problem_path,
                               graphOutPrefix=subgraph_out_key,
                               numberOfThreads=n_threads,
                               serializeEdges=False)

    # serialize the multicut problem for the next scale level

    graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph'
    with vu.file_reader(problem_path, 'r') as f:
        ignore_label = f[graph_key].attrs['ignore_label']

    n_new_edges = len(new_uv_ids)
    graph_out = g_out.require_group('graph_lmc')
    graph_out.attrs['ignore_label'] = ignore_label
    graph_out.attrs['numberOfNodes'] = n_new_nodes
    graph_out.attrs['numberOfEdges'] = n_new_edges
    graph_out.attrs['shape'] = shape

    def _serialize(out_group, name, data, dtype='uint64'):
        ser_chunks = (min(data.shape[0], 262144), 2) if data.ndim == 2 else\
            (min(data.shape[0], 262144),)
        ds_ser = out_group.require_dataset(name,
                                           dtype=dtype,
                                           shape=data.shape,
                                           chunks=ser_chunks,
                                           compression='gzip')
        ds_ser.n_threads = n_threads
        ds_ser[:] = data

    # NOTE we don not need to serialize the nodes cause they are
    # consecutive anyway
    # _serialize('nodes', np.arange(n_new_nodes).astype('uint64'))

    # serialize the new graph, the node labeling and the new costs
    _serialize(graph_out, 'edges', new_uv_ids)
    _serialize(g_out, 'node_labeling_lmc', new_initial_node_labeling)
    _serialize(g_out, 'costs_lmc', new_costs, dtype='float32')
    # serialize lifted uvs and costs
    _serialize(g_out, 'lifted_nh_%s' % lifted_prefix, new_lifted_uvs)
    _serialize(g_out,
               'lifted_costs_%s' % lifted_prefix,
               new_lifted_costs,
               dtype='float32')

    return n_new_edges
def serialize_new_problem(graph_path, n_new_nodes, new_uv_ids,
                          node_labeling, edge_labeling,
                          new_costs, new_initial_node_labeling,
                          shape, scale, initial_block_shape,
                          tmp_folder, n_threads, roi):

    next_scale = scale + 1
    merged_graph_path = os.path.join(tmp_folder, 'merged_graph.n5')
    f_graph = z5py.File(merged_graph_path, use_zarr_format=False)
    # if 's%i' % next_scale not in f_graph:
    #     g_out = f_graph.create_group('s%i' % next_scale)
    # else:
    #     g_out = f_graph['s%i' % next_scale]
    # if 'sub_graphs' not in g_out:
    #     g_out.create_group('sub_graphs')
    g_out = f_graph.require_group('s%i' % next_scale)
    g_out.require_group('sub_graphs')

    # TODO this should be handled by symlinks
    if scale == 0:
        block_in_prefix = os.path.join(graph_path, 'sub_graphs',
                                       's%i' % scale, 'block_')
    else:
        block_in_prefix = os.path.join(tmp_folder, 'merged_graph.n5',
                                       's%i' % scale, 'sub_graphs', 'block_')

    block_out_prefix = os.path.join(tmp_folder, 'merged_graph.n5',
                                    's%i' % next_scale, 'sub_graphs', 'block_')

    factor = 2**scale
    block_shape = [factor * bs for bs in initial_block_shape]

    new_factor = 2**(scale + 1)
    new_block_shape = [new_factor * bs for bs in initial_block_shape]

    shape = z5py.File(graph_path).attrs['shape']
    blocking = nifty.tools.blocking([0, 0, 0], shape, new_block_shape)
    if roi is None:
        new_block_ids = list(range(blocking.numberOfBlocks))
    else:
        new_block_ids = blocking.getBlockIdsOverlappingBoundingBox(roi[0], roi[1],
                                                                   [0, 0, 0]).tolist()

    print("Serializing new graph")
    ndist.serializeMergedGraph(block_in_prefix, shape,
                               block_shape, new_block_shape,
                               new_block_ids, n_new_nodes,
                               node_labeling, edge_labeling,
                               block_out_prefix, n_threads)

    # serialize the full graph for the next scale level
    n_new_edges = len(new_uv_ids)
    g_out.attrs['numberOfNodes'] = n_new_nodes
    g_out.attrs['numberOfEdges'] = n_new_edges

    shape_edges = (n_new_edges, 2)
    ds_edges = g_out.require_dataset('edges', dtype='uint64',
                                     shape=shape_edges, chunks=shape_edges)
    ds_edges.n_threads = n_threads
    ds_edges[:] = new_uv_ids

    nodes = np.unique(new_uv_ids)
    shape_nodes = (len(nodes),)
    ds_nodes = g_out.require_dataset('nodes', dtype='uint64',
                                     shape=shape_nodes, chunks=shape_nodes)
    ds_nodes.n_threads = n_threads
    ds_nodes[:] = nodes

    # serialize the node labeling
    shape_node_labeling = (len(new_initial_node_labeling),)
    ds_node_labeling = g_out.create_dataset('nodeLabeling', dtype='uint64',
                                            shape=shape_node_labeling,
                                            chunks=shape_node_labeling)
    ds_node_labeling[:] = new_initial_node_labeling

    # serialize the new costs
    shape_costs = (n_new_edges,)
    ds_costs = g_out.require_dataset('costs', dtype='float32',
                                     shape=shape_costs, chunks=shape_costs)
    ds_costs.n_threads = n_threads
    ds_costs[:] = new_costs

    return n_new_edges