def solve_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] scale = config['scale'] block_shape = config['block_shape'] block_list = config['block_list'] n_threads = config['threads_per_job'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) fu.log("reading problem from %s" % problem_path) problem = z5py.N5File(problem_path) shape = problem.attrs['shape'] # load the costs costs_key = 's%i/costs' % scale fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph graph_key = 's%i/graph' % scale fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(os.path.join(problem_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() # check if the problem has an ignore-label ignore_label = problem[graph_key].attrs['ignoreLabel'] fu.log("ignore label is %s" % ('true' if ignore_label else 'false')) fu.log("using agglomerator %s" % agglomerator_key) agglomerator = su.key_to_agglomerator(agglomerator_key) # the output group out = problem['s%i/sub_results' % scale] # TODO this should be a n5 varlen dataset as well and # then this is just another dataset in problem path block_prefix = os.path.join(problem_path, 's%i' % scale, 'sub_graphs', 'block_') blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_block_problem, block_id, graph, uv_ids, block_prefix, costs, agglomerator, ignore_label, blocking, out, time_limit) for block_id in block_list ] [t.result() for t in tasks] fu.log_job_success(job_id)
def solve_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs costs_path = config['costs_path'] costs_key = config['costs_key'] graph_path = config['graph_path'] graph_key = config['graph_key'] decomposition_path = config['decomposition_path'] tmp_folder = config['tmp_folder'] component_list = config['block_list'] n_threads = config['threads_per_job'] agglomerator_key = config['agglomerator'] with vu.file_reader(costs_path, 'r') as f: ds = f[costs_key] ds.n_threads = n_threads costs = ds[:] with vu.file_reader(decomposition_path, 'r') as f: ds = f['graph_labels'] ds.n_threads = n_threads graph_labels = ds[:] # load the graph graph = ndist.Graph(os.path.join(graph_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() agglomerator = su.key_to_agglomerator(agglomerator_key) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_component, component_id, graph, uv_ids, graph_labels, costs, agglomerator) for component_id in component_list ] results = [t.result() for t in tasks] cut_edge_ids = np.concatenate([res for res in results if res is not None]) cut_edge_ids = np.unique(cut_edge_ids) res_folder = os.path.join(tmp_folder, 'subproblem_results') job_res_path = os.path.join(res_folder, 'job%i.npy' % job_id) fu.log("saving cut edge results to %s" % job_res_path) np.save(job_res_path, cut_edge_ids) fu.log_job_success(job_id)
def solve_global(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # path to the reduced problem problem_path = config['problem_path'] # path where the node labeling shall be written assignment_path = config['assignment_path'] assignment_key = config['assignment_key'] scale = config['scale'] agglomerator_key = config['agglomerator'] n_threads = config['threads_per_job'] time_limit = config.get('time_limit_solver', None) fu.log("using agglomerator %s" % agglomerator_key) if time_limit is None: fu.log("agglomeration without time limit") else: fu.log("agglomeration time limit %i" % time_limit) agglomerator = su.key_to_agglomerator(agglomerator_key) with vu.file_reader(problem_path, 'r') as f: group = f['s%i' % scale] graph_group = group['graph'] ignore_label = graph_group.attrs['ignoreLabel'] ds = graph_group['edges'] ds.n_threads = n_threads uv_ids = ds[:] n_edges = len(uv_ids) # we only need to load the initial node labeling if at # least one reduction step was performed i.e. scale > 0 if scale > 0: ds = group['node_labeling'] ds.n_threads = n_threads initial_node_labeling = ds[:] ds = group['costs'] ds.n_threads = n_threads costs = ds[:] assert len(costs) == n_edges, "%i, %i" (len(costs), n_edges) n_nodes = int(uv_ids.max()) + 1 fu.log("creating graph with %i nodes an %i edges" % (n_nodes, len(uv_ids))) graph = nifty.graph.undirectedGraph(n_nodes) graph.insertEdges(uv_ids) fu.log("start agglomeration") node_labeling = agglomerator(graph, costs, n_threads=n_threads, time_limit=time_limit) fu.log("finished agglomeration") # get the labeling of initial nodes if scale > 0: initial_node_labeling = node_labeling[initial_node_labeling] else: initial_node_labeling = node_labeling n_nodes = len(initial_node_labeling) # make sure zero is mapped to 0 if we have an ignore label if ignore_label and initial_node_labeling[0] != 0: new_max_label = int(node_labeling.max() + 1) initial_node_labeling[initial_node_labeling == 0] = new_max_label initial_node_labeling[0] = 0 node_shape = (n_nodes,) chunks = (min(n_nodes, 524288),) with vu.file_reader(assignment_path) as f: ds = f.require_dataset(assignment_key, dtype='uint64', shape=node_shape, chunks=chunks, compression='gzip') ds.n_threads = n_threads ds[:] = initial_node_labeling fu.log('saving results to %s:%s' % (assignment_path, assignment_key)) fu.log_job_success(job_id)
def solve_lifted_subproblems(job_id, config_path): fu.log("start processing job %i" % job_id) fu.log("reading config from %s" % config_path) # get the config with open(config_path) as f: config = json.load(f) # input configs problem_path = config['problem_path'] scale = config['scale'] block_shape = config['block_shape'] block_list = config['block_list'] lifted_prefix = config['lifted_prefix'] agglomerator_key = config['agglomerator'] time_limit = config.get('time_limit_solver', None) n_threads = config.get('threads_per_job', 1) fu.log("reading problem from %s" % problem_path) problem = z5py.N5File(problem_path) shape = problem.attrs['shape'] # load the costs # NOTE we use different cost identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the costs come from the CostsWorkflow and # hence the identifier is identical costs_key = 's%i/costs_lmc' % scale if scale > 0 else 's0/costs' fu.log("reading costs from path in problem: %s" % costs_key) ds = problem[costs_key] ds.n_threads = n_threads costs = ds[:] # load the graph # NOTE we use different graph identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the graph comes from the GraphWorkflow and # hence the identifier is identical graph_key = 's%i/graph_lmc' % scale if scale > 0 else 's0/graph' fu.log("reading graph from path in problem: %s" % graph_key) graph = ndist.Graph(os.path.join(problem_path, graph_key), numberOfThreads=n_threads) uv_ids = graph.uvIds() # check if the problem has an ignore-label ignore_label = problem[graph_key].attrs['ignoreLabel'] fu.log("ignore label is %s" % ('true' if ignore_label else 'false')) fu.log("using agglomerator %s" % agglomerator_key) lifted_agglomerator = su.key_to_lifted_agglomerator(agglomerator_key) # TODO enable different multicut agglomerator agglomerator = su.key_to_agglomerator(agglomerator_key) # load the lifted edges and costs nh_key = 's%i/lifted_nh_%s' % (scale, lifted_prefix) lifted_costs_key = 's%i/lifted_costs_%s' % (scale, lifted_prefix) ds = problem[nh_key] fu.log("reading lifted uvs") ds.n_threads = n_threads lifted_uvs = ds[:] fu.log("reading lifted costs") ds = problem[lifted_costs_key] ds.n_threads = n_threads lifted_costs = ds[:] # the output group out = problem['s%i/sub_results_lmc' % scale] # NOTE we use different sub-graph identifiers for multicut and lifted multicut # in order to run both in the same n5-container. # However, for scale level 0 the sub-graphs come from the GraphWorkflow and # are hence identical sub_graph_identifier = 'sub_graphs' if scale == 0 else 'sub_graphs_lmc' block_prefix = os.path.join(problem_path, 's%i' % scale, sub_graph_identifier, 'block_') blocking = nt.blocking([0, 0, 0], shape, list(block_shape)) fu.log("start processsing %i blocks" % len(block_list)) with futures.ThreadPoolExecutor(n_threads) as tp: tasks = [ tp.submit(_solve_block_problem, block_id, graph, uv_ids, block_prefix, costs, lifted_uvs, lifted_costs, lifted_agglomerator, agglomerator, ignore_label, blocking, out, time_limit) for block_id in block_list ] [t.result() for t in tasks] fu.log_job_success(job_id)