def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) config = self.get_task_config() config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'offsets_path': self.offsets_path, 'block_shape': block_shape, 'tmp_folder': self.tmp_folder }) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) # we only have a single job to find the labeling self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() # get the number of graph edges and the volume shape with vu.file_reader(self.graph_path, 'r') as f: g = f[self.graph_key] shape = tuple(g.attrs['shape']) n_edges = g.attrs['numberOfEdges'] self._write_log("Merging edge features for %i edges" % n_edges) # if we don't have a roi, we only serialize the number of blocks # otherwise we serialize the blocks in roi if roi_begin is None: block_ids = nt.blocking([0, 0, 0], shape, block_shape).numberOfBlocks self._write_log("Merging edge features for %i blocks" % block_ids) else: block_ids = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) self._write_log("Merging edge features for %i blocks" % len(block_ids)) subfeat_key = 's0/sub_features' subgraph_key = 's0/sub_graphs' with vu.file_reader(self.output_path, 'r') as f: n_features = f[subfeat_key].attrs['n_features'] # require the output dataset chunk_size = min(262144, n_edges) # chunk size = 64**3 with vu.file_reader(self.output_path) as f: feat_shape = (n_edges, n_features) feat_chunks = (chunk_size, 1) f.require_dataset(self.output_key, dtype='float64', shape=feat_shape, chunks=feat_chunks, compression='gzip') # update the task config config.update({'graph_path': self.graph_path, 'subgraph_key': subgraph_key, 'in_path': self.output_path, 'subfeat_key': subfeat_key, 'output_path': self.output_path, 'output_key': self.output_key, 'edge_chunk_size': chunk_size, 'block_ids': block_ids, 'n_edges': n_edges}) edge_block_list = vu.blocks_in_volume([n_edges], [chunk_size]) n_jobs = min(len(edge_block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, edge_block_list, config, consecutive_blocks=True) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) config = self.get_task_config() config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape, 'tmp_folder': self.tmp_folder }) # make output dataset chunks = config.pop('chunks', None) if chunks is None: chunks = tuple(bs // 2 for bs in block_shape) compression = config.pop('compression', 'gzip') with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, dtype='uint64', compression=compression, chunks=chunks) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) if self.threshold is not None: config.update({'threshold': self.threshold}) # we only have a single job to find the labeling self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # we don't need any additional config besides the paths config = self.get_task_config() config.update({ "ws_path": self.ws_path, "ws_key": self.ws_key, "graph_block_prefix": self.graph_block_prefix, "block_shape": block_shape, "tmp_folder": self.tmp_folder }) shape = vu.get_shape(self.ws_path, self.ws_key) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the watershed config config = self.get_task_config() with vu.file_reader(self.graph_path) as f: shape = f.attrs['shape'] factor = 2**self.scale block_shape = tuple(sh * factor for sh in block_shape) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) # update the config with input and graph paths and keys # as well as block shape config.update({ 'graph_path': self.graph_path, 'scale': self.scale, 'input_key': self.input_key }) # prime and run the job self.prepare_jobs(1, block_list, config) self.submit_jobs(1) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(1)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) if len(shape) == 4: shape = shape[1:] # load the watershed config ws_config = self.get_task_config() # require output dataset # TODO read chunks from config chunks = tuple(bs // 2 for bs in block_shape) chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape)) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, chunks=chunks, compression='gzip', dtype='uint64') # update the config with input and output paths and keys # as well as block shape ws_config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'seeds_path': self.seeds_path, 'seeds_key': self.seeds_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape }) if self.mask_path != '': assert self.mask_key != '' ws_config.update({ 'mask_path': self.mask_path, 'mask_key': self.mask_key }) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) self._write_log('scheduling %i blocks to be processed' % len(block_list)) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, ws_config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) # load the create_multiset config config = self.get_task_config() compression = config.get('compression', 'gzip') # require output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, chunks=tuple(block_shape), compression=compression, dtype='uint8') # update the config with input and output paths and keys # as well as block shape config.update({'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape}) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) self._write_log('scheduling %i blocks to be processed' % len(block_list)) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) with vu.file_reader(self.problem_path, 'r') as f: shape = f[self.graph_key].attrs['shape'] block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) config = self.get_task_config() tmp_file = os.path.join(self.tmp_folder, 'stitch_edges.n5') config.update({ 'input_path': tmp_file, 'problem_path': self.problem_path, 'features_key': self.features_key, 'graph_key': self.graph_key, 'assignments_path': self.assignments_path, 'assignments_key': self.assignments_key, 'edge_size_threshold': self.edge_size_threshold, 'serialize_edges': self.serialize_edges, 'n_jobs': n_jobs }) with vu.file_reader(tmp_file) as f: f.require_group('job_results') # we only have a single job to find the labeling self.prepare_jobs(1, None, config) self.submit_jobs(1) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(1)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() # update the config with input and graph paths and keys # as well as block shape config.update({ 'path': self.path, 'key': self.key, 'tmp_folder': self.tmp_folder, 'block_shape': block_shape }) with vu.file_reader(self.path, 'r') as f: shape = f[self.key].shape if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang = self.global_config_values()[0] self.init(shebang) # load the task config config = self.get_task_config() out_shape = (self.number_of_labels, 11) out_chunks = (min(self.number_of_labels, 100000), 11) block_list = vu.blocks_in_volume([out_shape[0]], [out_chunks[0]]) # create output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=out_shape, chunks=out_chunks, compression='gzip', dtype='float64') # update the config with input and graph paths and keys # as well as block shape config.update({'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'out_shape': out_shape, 'out_chunks': out_chunks}) # prime and run the jobs self.prepare_jobs(self.max_jobs, block_list, config, self.prefix) self.submit_jobs(self.max_jobs, self.prefix) # wait till jobs finish and check for job success self.wait_for_jobs(self.prefix) self.check_jobs(self.max_jobs, self.prefix)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() chunk_size = min(10000, self.number_of_labels) # require the output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, dtype='float32', shape=(self.number_of_labels,), chunks=(chunk_size,), compression='gzip') # temporary output dataset tmp_path = os.path.join(self.tmp_folder, 'region_features_tmp.n5') tmp_key = 'block_feats' # update the task config config.update({'output_path': self.output_path, 'output_key': self.output_key, 'tmp_path': tmp_path, 'tmp_key': tmp_key, 'node_chunk_size': chunk_size}) node_block_list = vu.blocks_in_volume([self.number_of_labels], [chunk_size]) n_jobs = min(len(node_block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, node_block_list, config, consecutive_blocks=True) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) config = self.get_task_config() config.update({'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'filter_name': self.filter_name, 'sigma': self.sigma, 'halo': self.halo, 'block_shape': block_shape}) shape = vu.get_shape(self.input_path, self.input_key) chunks = tuple(min(bs // 2, sh) for bs, sh in zip(block_shape, shape)) with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, dtype='float32', compression='gzip', chunks=chunks) if self.n_retries == 0: # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() # get shape and check dimension and channel param shape = vu.get_shape(self.input_path, self.input_key) if len(shape) == 4 and self.channel is None: raise RuntimeError("Got 4d input, but channel was not specified") if len(shape) == 4 and self.channel >= shape[0]: raise RuntimeError("Channel %i is to large for n-channels %i" % (self.channel, shape[0])) if len(shape) == 3 and self.channel is not None: raise RuntimeError("Channel was specified, but input is only 3d") if len(shape) == 4: shape = shape[1:] # temporary output dataset output_path = os.path.join(self.tmp_folder, 'region_features_tmp.n5') output_key = 'block_feats' config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'labels_path': self.labels_path, 'labels_key': self.labels_key, 'output_path': output_path, 'output_key': output_key, 'block_shape': block_shape, 'channel': self.channel }) # require the temporary output data-set f_out = z5py.File(output_path) f_out.require_dataset(output_key, shape=shape, compression='gzip', chunks=tuple(block_shape), dtype='float32') if self.n_retries == 0: # get shape and make block config block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config, self.prefix) self.submit_jobs(n_jobs, self.prefix) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs, self.prefix)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) config = self.get_task_config() config.update({ 'shape': self.shape, 'offsets_path': self.offsets_path, 'overlap_prefix': self.overlap_prefix, 'save_prefix': self.save_prefix, 'overlap_threshold': self.overlap_threshold, 'block_shape': block_shape, 'tmp_folder': self.tmp_folder, 'halo': self.halo }) # we only have a single job to find the labeling self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang = self.global_config_values()[0] self.init(shebang) # load the task config config = self.get_task_config() if self.compute_cell_features: config = self._update_config_for_cells(config) else: config = self._update_config_for_nuclei(config) # TODO match block size and number of blocks # we hard-code the chunk-size to 1000 for now number_of_labels = self._get_number_of_labels() block_len = self._compute_block_len(number_of_labels) block_list = vu.blocks_in_volume([number_of_labels], [block_len]) config.update({'block_len': block_len, 'compute_cell_features': self.compute_cell_features, 'number_of_labels': number_of_labels}) prefix = 'cells' if self.compute_cell_features else 'nuclei' # prime and run the job n_jobs = min(len(block_list), self.max_jobs) self.prepare_jobs(n_jobs, block_list, config, prefix) self.submit_jobs(n_jobs, prefix) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs, prefix)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) if len(shape) == 4: shape = shape[1:] # load the agglomerate config config = self.get_task_config() # update the config with input and output paths and keys # as well as block shape config.update({'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape, 'have_ignore_label': self.have_ignore_label}) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) self._write_log('scheduling %i blocks to be processed' % len(block_list)) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def debug_vol(): path = '../data.n5' key = 'volumes/cilia/segmentation' f = open_file(path) ds = f[key] shape = ds.shape block_shape = ds.chunks roi_begin = [7216, 12288, 7488] roi_end = [8640, 19040, 11392] blocks, blocking = blocks_in_volume(shape, block_shape, roi_begin, roi_end, return_blocking=True) print("Have", len(blocks), "blocks in roi") # check reading all blocks for block_id in blocks: print("Check block", block_id) block = blocking.getBlock(block_id) bb = block_to_bb(block) d = ds[bb] print("Have block", block_id) print("All checks passsed")
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) assert len(shape) == 4, "Need 4d input for MWS" n_channels = shape[0] shape = shape[1:] # TODO make optional which channels to choose assert len(self.offsets) == n_channels,\ "%i, %i" % (len(self.offsets), n_channels) assert all(len(off) == 3 for off in self.offsets) config = self.get_task_config() config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'block_shape': block_shape, 'offsets': self.offsets, 'halo': self.halo }) # check if we have a mask and add to the config if we do if self.mask_path != '': assert self.mask_key != '' config.update({ 'mask_path': self.mask_path, 'mask_key': self.mask_key }) # get chunks chunks = config.pop('chunks', None) if chunks is None: chunks = tuple(bs // 2 for bs in block_shape) # clip chunks chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape)) # make output dataset compression = config.pop('compression', 'gzip') with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, dtype='uint64', compression=compression, chunks=chunks) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) shape = vu.get_shape(self.labels_path, self.labels_key) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) graph_key = 's0/graph' with vu.file_reader(self.graph_path, 'r') as f: n_edges = f[graph_key].attrs['numberOfEdges'] config = self.get_task_config() tmp_file = os.path.join(self.tmp_folder, 'stitch_edges.n5') config.update({ 'out_path': tmp_file, 'graph_path': self.graph_path, 'labels_path': self.labels_path, 'labels_key': self.labels_key, 'n_edges': n_edges, 'block_shape': block_shape }) with vu.file_reader(tmp_file) as f: f.require_group('job_results') # we only have a single job to find the labeling self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(n_jobs)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) config = self.get_task_config() config.update({ 'output_path': self.output_path, 'output_key': self.output_key, 'tmp_folder': self.tmp_folder, 'n_jobs': n_jobs, 'number_of_labels': int(self.number_of_labels) }) # we only have a single job to find the labeling self.prepare_jobs(1, None, config) self.submit_jobs(1) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(1)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # we don't need any additional config besides the paths config = { "input_path": self.input_path, "input_key": self.input_key, "block_shape": block_shape, "tmp_folder": self.tmp_folder, "return_counts": self.return_counts } self._write_log('scheduling %i blocks to be processed' % len(block_list)) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) # load the downscale_multiset config config = self.get_task_config() compression = config.get('compression', 'gzip') out_shape = downscale_shape(shape, self.scale_factor) # require output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=out_shape, chunks=tuple(block_shape), compression=compression, dtype='uint8') # update the config with input and output paths and keys # as well as block shape config.update({ 'input_path': self.input_path, 'input_key': self.input_key, 'output_path': self.output_path, 'output_key': self.output_key, 'scale_factor': self.scale_factor, 'restrict_set': self.restrict_set, 'effective_scale_factor': self.effective_scale_factor, 'block_shape': block_shape }) # if we have a roi, we need to adjust it given the effective scaling factor if roi_begin is not None: roi_begin = [ rb // eff for rb, eff in zip(roi_begin, self.effective_scale_factor) ] roi_end = [ re // eff for re, eff in zip(roi_end, self.effective_scale_factor) ] block_list = vu.blocks_in_volume(out_shape, block_shape, roi_begin, roi_end) self._write_log('scheduling %i blocks to be processed' % len(block_list)) n_jobs = min(len(block_list), self.max_jobs) self._write_log("submitting %i blocks with %i jobs" % (len(block_list), n_jobs)) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config, self.scale_prefix) self.submit_jobs(n_jobs, self.scale_prefix) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs, self.scale_prefix)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) # update the config with input and graph paths and keys # as well as block shape config.update({ 'output_path': self.output_path, 'tmp_folder': self.tmp_folder, 'n_jobs': n_jobs }) # prime and run the jobs self.prepare_jobs(1, None, config) self.submit_jobs(1) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(1)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) config = { 'input_path': self.input_path, 'input_key': self.input_key, 'tmp_folder': self.tmp_folder, 'n_jobs': n_jobs, 'size_threshold': self.size_threshold } # we only have a single job to find the labeling self.prepare_jobs(1, None, config) self.submit_jobs(1) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again save_path = os.path.join(self.tmp_folder, 'discard_ids.npy') self._write_log("saving results to %s" % save_path) self.check_jobs(1)
def run_impl(self): shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) config = self.get_task_config() config.update({ 'affinity_path': self.affinity_path, 'affinity_key': self.affinity_key, 'objects_path': self.objects_path, 'objects_key': self.objects_key, 'offsets': self.offsets, 'block_shape': block_shape }) shape = vu.get_shape(self.affinity_path, self.affinity_key)[1:] chunks = vu.file_reader( self.affinity_path)[self.affinity_key].chunks[1:] assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks)) block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) n_jobs = min(len(block_list), self.max_jobs) # we only have a single job to find the labeling self.prepare_jobs(n_jobs, block_list, config) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() # log the save-path again self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() with vu.file_reader(self.features_path) as f: feat_shape = f[self.features_key].shape n_edges = feat_shape[0] # chunk size = 64**3 chunk_size = min(262144, n_edges) # require output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=(n_edges, ), compression='gzip', dtype='float32', chunks=(chunk_size, )) f.require_dataset(self.output_labels_key, shape=(n_edges, ), compression='gzip', dtype='uint32', chunks=(chunk_size, )) # update the config with input and output paths and keys # as well as block shape config.update({ 'rf_path': self.rf_path, 'features_path': self.features_path, 'features_key': self.features_key, 'output_path': self.output_path, 'output_key': self.output_key, 'output_labels_key': self.output_labels_key, 'chunk_size': chunk_size, 'n_edges': n_edges, 'edge_classes': self.edge_classes }) if self.n_retries == 0: edge_block_list = vu.blocks_in_volume([n_edges], [chunk_size]) else: edge_block_list = self.block_list self.clean_up_for_retry(edge_block_list) n_jobs = min(len(edge_block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, edge_block_list, config, consecutive_blocks=True) self.submit_jobs(n_jobs) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs)
def run_impl(self): # get the global config and init configs shebang, block_shape, global_roi_begin, global_roi_end = self.global_config_values() self.init(shebang) assert (self.roi_begin is None) == (self.roi_end is None),\ "Either both or neither of `roi_begin` and `roi_end` must be specified" # if we have don't jave a task-sppecific roi, set roi to global roi if self.roi_begin is None: roi_begin = global_roi_begin roi_end = global_roi_end else: # otherwise set to task-specific roi roi_begin = self.roi_begin roi_end = self.roi_end # if we also have a global roi, check that the task-specific roi # is in the global roi if global_roi_begin is not None: assert all(rb >= grb for rb, grb in zip(roi_begin, global_roi_begin)) if global_roi_end is not None: assert all(re <= geb for eb, geb in zip(roi_end, global_roi_end)) # read shape with vu.file_reader(self.problem_path, 'r') as f: shape = tuple(f.attrs['shape']) # make output dataset with vu.file_reader(self.output_path) as f: f.require_dataset(self.output_key, shape=shape, dtype='uint64', chunks=(25, 256, 256), compression='gzip') factor = 2**self.scale block_shape = tuple(bs * factor for bs in block_shape) # update the config with input and graph paths and keys # as well as block shape config = self.get_task_config() config.update({'problem_path': self.problem_path, 'scale': self.scale, 'block_shape': block_shape, 'ws_path': self.ws_path, 'ws_key': self.ws_key, 'output_path': self.output_path, 'output_key': self.output_key, 'sub_graph_identifier': self.sub_graph_identifier, 'sub_result_identifier': self.sub_result_identifier}) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) # prime and run the jobs prefix = 's%i' % self.scale self.prepare_jobs(1, block_list, config, prefix) self.submit_jobs(1, prefix) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(1, prefix)
def run_impl(self): # get the global config and init configs # shebang, block_shape, roi_begin, roi_end = self.global_config_values() shebang, block_shape, roi_begin, roi_end, block_list_path\ = self.global_config_values(with_block_list_path=True) self.init(shebang) with vu.file_reader(self.problem_path, 'r') as f: shape = tuple(f.attrs['shape']) factor = 2**self.scale block_shape = tuple(bs * factor for bs in block_shape) # update the config with input and graph paths and keys # as well as block shape config = self.get_task_config() config.update({ 'problem_path': self.problem_path, 'scale': self.scale, 'block_shape': block_shape, 'lifted_prefix': self.lifted_prefix }) # make output datasets out_key = 's%i/sub_results_lmc' % self.scale with vu.file_reader(self.problem_path) as f: out = f.require_group(out_key) # NOTE, gzip may fail for very small inputs, so we use raw compression for now # might be a good idea to give blosc a shot ... out.require_dataset('cut_edge_ids', shape=shape, chunks=block_shape, compression='raw', dtype='uint64') out.require_dataset('node_result', shape=shape, chunks=block_shape, compression='raw', dtype='uint64') if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end, block_list_path) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs prefix = 's%i' % self.scale self.prepare_jobs(n_jobs, block_list, config, prefix) self.submit_jobs(n_jobs, prefix) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs, prefix)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # load the task config config = self.get_task_config() # update the config with input and graph paths and keys # as well as block shape config.update({ 'ws_path': self.ws_path, 'ws_key': self.ws_key, 'input_path': self.input_path, 'input_key': self.input_key, 'block_shape': block_shape, 'output_path': self.output_path, 'output_key': self.output_key, 'ignore_label': self.ignore_label }) shape = vu.get_shape(self.ws_path, self.ws_key) chunks = tuple(min(bs, sh) for bs, sh in zip(block_shape, shape)) try: max_id = vu.file_reader(self.ws_path, 'r')[self.ws_key].attrs['maxId'] except KeyError: raise KeyError("Dataset %s:%s does not have attribute maxId" % (self.ws_path, self.ws_key)) # create output dataset with vu.file_reader(self.output_path) as f: ds_out = f.require_dataset(self.output_key, shape=shape, dtype='uint64', chunks=chunks, compression='gzip') # need to serialize the label max-id here for # the merge_node_labels task ds_out.attrs['maxId'] = int(max_id) if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config, self.prefix) self.submit_jobs(n_jobs, self.prefix) # wait till jobs finish and check for job success self.wait_for_jobs() self.check_jobs(n_jobs, self.prefix)
def run_impl(self): # get the global config and init configs shebang, block_shape, roi_begin, roi_end = self.global_config_values() self.init(shebang) # get shape and make block config shape = vu.get_shape(self.input_path, self.input_key) # require output dataset chunks = tuple(bs // 2 if bs % 2 == 0 else bs for bs in block_shape) chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape)) with vu.file_reader(self.output_path) as f: if self.output_key in f: chunks = f[self.output_key].chunks assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks)), "%s, %s" % (str(block_shape), str(chunks)) f.require_dataset(self.output_key, shape=shape, chunks=chunks, compression='gzip', dtype='uint64') n_threads = self.get_task_config().get('threads_per_core', 1) # check if input and output datasets are identical in_place = (self.input_path == self.output_path) and (self.input_key == self.output_key) if self.assignment_key is None: assert os.path.splitext(self.assignment_path)[-1] == '.pkl',\ "Assignments need to be pickled map if no key is given" # update the config with input and output paths and keys # as well as block shape config = {'input_path': self.input_path, 'input_key': self.input_key, 'block_shape': block_shape, 'n_threads': n_threads, 'assignment_path': self.assignment_path, 'assignment_key': self.assignment_key} if self.offset_path != '': config.update({'offset_path': self.offset_path}) # we only add output path and key if we do not write in place if not in_place: config.update({'output_path': self.output_path, 'output_key': self.output_key}) # get block list and jobs if self.n_retries == 0: block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end) else: block_list = self.block_list self.clean_up_for_retry(block_list, self.identifier) self._write_log('scheduling %i blocks to be processed' % len(block_list)) n_jobs = min(len(block_list), self.max_jobs) # prime and run the jobs self.prepare_jobs(n_jobs, block_list, config, self.identifier) self.submit_jobs(n_jobs, self.identifier) # wait till jobs finish and check for job success self.wait_for_jobs(self.identifier) self.check_jobs(n_jobs, self.identifier)