def run(self): from production import util # copy the script to the temp folder and replace the shebang file_dir = os.path.dirname(os.path.abspath(__file__)) script_path = os.path.join(self.tmp_folder, 'map_edge_ids.py') util.copy_and_replace(os.path.join(file_dir, 'map_edge_ids.py'), script_path) with open(self.config_path) as f: config = json.load(f) init_block_shape = config['block_shape'] n_threads = config['n_threads'] roi = config.get('roi', None) # make config for the job config = { 'block_shape': init_block_shape, 'n_threads': n_threads, 'roi': roi } for scale in range(self.max_scale + 1): self._prepare_job(scale, config) # submit the jobs if self.run_local: # this only works in python 3 ?! with futures.ProcessPoolExecutor(self.max_scale + 1) as tp: tasks = [ tp.submit(self._submit_job, scale, n_threads) for scale in range(self.max_scale + 1) ] [t.result() for t in tasks] else: for scale in range(self.max_scale + 1): self._submit_job(scale, n_threads) if not self.run_local: util.wait_for_jobs('papec') # check for results processed_scales, times = self._collect_outputs(self.max_scale + 1) success = len(processed_scales) == self.max_scale + 1 if success: with open(self.output().path, 'w') as f: json.dump({'times': times}, f) else: log_path = os.path.join(self.tmp_folder, 'map_edge_ids_partial.log') with open(log_path, 'w') as f: json.dump( { 'processed_scales': processed_scales, 'times': times }, f) raise RuntimeError("MapEdgesTask failed for %i / %i scales," % (len(times), self.max_scale + 1) + "partial results serialized to %s" % log_path)
def run(self): from production import util # copy the script to the temp folder and replace the shebang script_path = os.path.join(self.tmp_folder, 'global_problem.py') file_dir = os.path.dirname(os.path.abspath(__file__)) util.copy_and_replace(os.path.join(file_dir, 'global_problem.py'), script_path) with open(self.config_path) as f: config = json.load(f) n_threads = config['n_threads'] # TODO support computation with roi if 'roi' in config: roi = config['roi'] else: roi = None # prepare the job config job_config = {'n_threads': n_threads} config_path = os.path.join(self.tmp_folder, 'global_problem_config.json') with open(config_path, 'w') as f: json.dump(job_config, f) command = '%s %s %s %i %s %s' % (script_path, self.path, self.out_key, self.max_scale, config_path, self.tmp_folder) log_file = os.path.join(self.tmp_folder, 'logs', 'log_global_problem') err_file = os.path.join(self.tmp_folder, 'error_logs', 'err_global_problem') bsub_command = ('bsub -n %i -J global_problem ' % n_threads + '-We %i -o %s -e %s \'%s\'' % (self.time_estimate, log_file, err_file, command)) if self.run_local: subprocess.call([command], shell=True) else: subprocess.call([bsub_command], shell=True) util.wait_for_jobs('papec') # check the output try: res_path = os.path.join(self.tmp_folder, 'global_problem.log') with open(res_path) as f: res = json.load(f) t = res['t'] print("Global problem finished in", t, "s") success = True except Exception: success = False # clean up the output rmtree(os.path.join(self.path, self.out_key)) # write output file if we succeed, otherwise write partial # success to different file and raise exception if not success: raise RuntimeError("GlobalProblemTask failed")
def run(self): from production import util # copy the script to the temp folder and replace the shebang script_path = os.path.join(self.tmp_folder, 'merge_features.py') file_dir = os.path.dirname(os.path.abspath(__file__)) util.copy_and_replace(os.path.join(file_dir, 'merge_features.py'), script_path) with open(self.config_path) as f: config = json.load(f) block_shape = config['block_shape'] n_threads = config['n_threads'] roi = config.get('roi', None) # write job config job_config = { 'block_shape': block_shape, 'n_threads': n_threads, 'roi': roi } config_path = os.path.join(self.tmp_folder, 'merge_features_config.json') with open(config_path, 'w') as f: json.dump(job_config, f) # submit job command = '%s %s %s %s %s' % (script_path, self.graph_path, self.out_path, config_path, self.tmp_folder) log_file = os.path.join(self.tmp_folder, 'logs', 'log_merge_features') err_file = os.path.join(self.tmp_folder, 'error_logs', 'err_merge_features') bsub_command = 'bsub -n %i -J merge_features -We %i -o %s -e %s \'%s\'' % ( n_threads, self.time_estimate, log_file, err_file, command) if self.run_local: subprocess.call([command], shell=True) else: subprocess.call([bsub_command], shell=True) util.wait_for_jobs('papec') try: with open(self.output().path) as f: json.load(f)['t'] success = True except Exception: success = False if not success: raise RuntimeError("MergeFeaturesTask failed")
def run(self): from production import util # copy the script to the temp folder and replace the shebang script_path = os.path.join(self.tmp_folder, 'make_costs.py') file_dir = os.path.dirname(os.path.abspath(__file__)) util.copy_and_replace(os.path.join(file_dir, 'make_costs.py'), script_path) with open(self.config_path) as f: config = json.load(f) beta = config.get('beta', 0.5) weighting_exponent = config.get('weighting_exponent', 1.) weight_edges = config.get('weight_multicut_edges', False) # write job config job_config = { 'beta': beta, 'weight_edges': weight_edges, 'weighting_exponent': weighting_exponent } config_path = os.path.join(self.tmp_folder, 'make_costs_config.json') with open(config_path, 'w') as f: json.dump(job_config, f) # submit job command = '%s %s %s %s %s %s' % (script_path, self.features_path, self.graph_path, self.out_path, config_path, self.tmp_folder) log_file = os.path.join(self.tmp_folder, 'logs', 'log_costs') err_file = os.path.join(self.tmp_folder, 'error_logs', 'err_costs') bsub_command = 'bsub -J costs -We %i -o %s -e %s \'%s\'' % ( self.time_estimate, log_file, err_file, command) if self.run_local: subprocess.call([command], shell=True) else: subprocess.call([bsub_command], shell=True) util.wait_for_jobs('papec') try: with open(self.output().path) as f: json.load(f)['t'] success = True except Exception: success = False if not success: raise RuntimeError("CostsTask failed")
def run(self): from production import util # copy the script to the temp folder and replace the shebang file_dir = os.path.dirname(os.path.abspath(__file__)) util.copy_and_replace( os.path.join(file_dir, 'solve_subproblems.py'), os.path.join(self.tmp_folder, 'solve_subproblems.py')) with open(self.config_path) as f: config = json.load(f) initial_block_shape = config['block_shape'] n_threads = config['n_threads'] roi = config.get('roi', None) # get number of blocks factor = 2**self.scale block_shape = [factor * bs for bs in initial_block_shape] shape = z5py.File(self.graph_path).attrs['shape'] blocking = nifty.tools.blocking([0, 0, 0], shape, block_shape) # check if we have a roi and adjuse the block list if we do if roi is None: n_blocks = blocking.numberOfBlocks block_list = list(range(n_blocks)) else: block_list = blocking.getBlockIdsOverlappingBoundingBox( roi[0], roi[1], [0, 0, 0]).tolist() n_blocks = len(block_list) # find the actual number of jobs and prepare job configs n_jobs = min(n_blocks, self.max_jobs) self._prepare_jobs(n_jobs, block_list, initial_block_shape, n_threads) # submit the jobs if self.run_local: # this only works in python 3 ?! with futures.ProcessPoolExecutor(n_jobs) as tp: tasks = [ tp.submit(self._submit_job, job_id, n_threads) for job_id in range(n_jobs) ] [t.result() for t in tasks] else: for job_id in range(n_jobs): self._submit_job(job_id, n_threads) # wait till all jobs are finished if not self.run_local: util.wait_for_jobs('papec') # check the job outputs processed_blocks, times = self._collect_outputs(block_list) assert len(processed_blocks) == len(times) success = len(processed_blocks) == n_blocks # write output file if we succeed, otherwise write partial # success to different file and raise exception if success: out = self.output() # TODO does 'out' support with job? fres = out.open('w') json.dump({'times': times}, fres) fres.close() else: log_path = os.path.join( self.tmp_folder, 'solve_subproblems_s%i_partial.json' % self.scale) with open(log_path, 'w') as out: json.dump( { 'times': times, 'processed_blocks': processed_blocks }, out) raise RuntimeError("SolveSubproblemTask failed, " "%i / %i blocks processed, " "serialized partial results to %s" % (len(processed_blocks), n_blocks, log_path))
def run(self): from production import util # copy the script to the temp folder and replace the shebang file_dir = os.path.dirname(os.path.abspath(__file__)) util.copy_and_replace( os.path.join(file_dir, 'initial_features.py'), os.path.join(self.tmp_folder, 'initial_features.py')) with open(self.config_path) as f: config = json.load(f) block_shape = config['block_shape'] offsets = config['affinity_offsets'] roi = config.get('roi', None) # hardcoded keys graph_key = 'graph' out_key = 'features' # create the outpuy files f_graph = z5py.File(self.graph_path, use_zarr_format=False) shape = f_graph.attrs['shape'] ds_graph = f_graph[graph_key] n_edges = ds_graph.attrs['numberOfEdges'] f_out = z5py.File(self.out_path, use_zarr_format=False) f_out.require_group('blocks') # chunk size = 64**3 chunk_size = min(262144, n_edges) f_out.require_dataset(out_key, dtype='float64', shape=(n_edges, 10), chunks=(chunk_size, 1), compression='gzip') # get number of blocks blocking = nifty.tools.blocking([0, 0, 0], shape, block_shape) # check if we have a roi and adjuse the block list if we do if roi is None: n_blocks = blocking.numberOfBlocks block_list = list(range(n_blocks)) else: block_list = blocking.getBlockIdsOverlappingBoundingBox( roi[0], roi[1], [0, 0, 0]).tolist() n_blocks = len(block_list) # find the actual number of jobs and prepare job configs n_jobs = min(n_blocks, self.max_jobs) self._prepare_jobs(n_jobs, block_list, offsets) # submit the jobs if self.run_local: # this only works in python 3 ?! with futures.ProcessPoolExecutor(n_jobs) as tp: tasks = [ tp.submit(self._submit_job, job_id) for job_id in range(n_jobs) ] [t.result() for t in tasks] else: for job_id in range(n_jobs): self._submit_job(job_id) # wait till all jobs are finished if not self.run_local: util.wait_for_jobs('papec') # check the job outputs processed_jobs, times = self._collect_outputs(n_jobs) assert len(processed_jobs) == len(times) success = len(processed_jobs) == n_jobs # write output file if we succeed, otherwise write partial # success to different file and raise exception if success: out = self.output() # TODO does 'out' support with job? fres = out.open('w') json.dump({'times': times}, fres) fres.close() else: log_path = os.path.join(self.tmp_folder, 'initial_features_partial.json') with open(log_path, 'w') as out: json.dump({ 'times': times, 'processed_jobs': processed_jobs }, out) raise RuntimeError( "InitialFeatureTask failed, %i / %i jobs processed," % (len(processed_jobs), n_jobs) + "serialized partial results to %s" % log_path)
def run(self): from production import util # copy the script to the temp folder and replace the shebang script_path = os.path.join(self.tmp_folder, 'reduce_problem.py') file_dir = os.path.dirname(os.path.abspath(__file__)) util.copy_and_replace(os.path.join(file_dir, 'reduce_problem.py'), script_path) with open(self.config_path) as f: config = json.load(f) block_shape = config['block_shape'] n_threads = config['n_threads'] roi = config.get('roi', None) # prepare the job config job_config = {'block_shape': block_shape, 'n_threads': n_threads, 'roi': roi} config_path = os.path.join(self.tmp_folder, 'reduce_problem_config_s%i.json' % self.scale) with open(config_path, 'w') as f: json.dump(job_config, f) command = '%s %s %s %i %s %s' % (script_path, self.graph_path, self.costs_path, self.scale, config_path, self.tmp_folder) log_file = os.path.join(self.tmp_folder, 'logs', 'log_reduce_problem_s%i' % self.scale) err_file = os.path.join(self.tmp_folder, 'error_logs', 'err_reduce_problem_s%i.err' % self.scale) bsub_command = ('bsub -n %i -J reduce_problem ' % n_threads + '-We %i -o %s -e %s \'%s\'' % (self.time_estimate, log_file, err_file, command)) if self.run_local: subprocess.call([command], shell=True) else: subprocess.call([bsub_command], shell=True) util.wait_for_jobs('papec') ds_graph = z5py.File(self.graph_path)['graph'] nodes = ds_graph.attrs['numberOfNodes'] edges = ds_graph.attrs['numberOfEdges'] # check the output try: with open(self.output().path) as f: res = json.load(f) t = res['t'] new_nodes = res['new_nodes'] new_edges = res['new_edges'] print("Reduce problem finished in", t, "s") print("Reduced number of nodes from", nodes, "to", new_nodes) print("Reduced number of edges from", edges, "to", new_edges) success = True except Exception: success = False # write output file if we succeed, otherwise write partial # success to different file and raise exception if not success: raise RuntimeError("ReduceProblemTask failed")