Пример #1
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        config = self.get_task_config()
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'offsets_path': self.offsets_path,
            'block_shape': block_shape,
            'tmp_folder': self.tmp_folder
        })

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)

        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        # get the number of graph edges and the volume shape
        with vu.file_reader(self.graph_path, 'r') as f:
            g = f[self.graph_key]
            shape = tuple(g.attrs['shape'])
            n_edges = g.attrs['numberOfEdges']
        self._write_log("Merging edge features for %i edges" % n_edges)

        # if we don't have a roi, we only serialize the number of blocks
        # otherwise we serialize the blocks in roi
        if roi_begin is None:
            block_ids = nt.blocking([0, 0, 0], shape, block_shape).numberOfBlocks
            self._write_log("Merging edge features for %i blocks" % block_ids)
        else:
            block_ids = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
            self._write_log("Merging edge features for %i blocks" % len(block_ids))

        subfeat_key = 's0/sub_features'
        subgraph_key = 's0/sub_graphs'
        with vu.file_reader(self.output_path, 'r') as f:
            n_features = f[subfeat_key].attrs['n_features']

        # require the output dataset
        chunk_size = min(262144, n_edges)  # chunk size = 64**3
        with vu.file_reader(self.output_path) as f:
            feat_shape = (n_edges, n_features)
            feat_chunks = (chunk_size, 1)
            f.require_dataset(self.output_key, dtype='float64', shape=feat_shape,
                              chunks=feat_chunks, compression='gzip')

        # update the task config
        config.update({'graph_path': self.graph_path, 'subgraph_key': subgraph_key,
                       'in_path': self.output_path, 'subfeat_key': subfeat_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'edge_chunk_size': chunk_size, 'block_ids': block_ids,
                       'n_edges': n_edges})

        edge_block_list = vu.blocks_in_volume([n_edges], [chunk_size])

        n_jobs = min(len(edge_block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, edge_block_list, config,
                          consecutive_blocks=True)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #3
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        config = self.get_task_config()
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'tmp_folder': self.tmp_folder
        })
        # make output dataset
        chunks = config.pop('chunks', None)
        if chunks is None:
            chunks = tuple(bs // 2 for bs in block_shape)
        compression = config.pop('compression', 'gzip')
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              dtype='uint64',
                              compression=compression,
                              chunks=chunks)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)

        n_jobs = min(len(block_list), self.max_jobs)

        if self.threshold is not None:
            config.update({'threshold': self.threshold})

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
Пример #4
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # we don't need any additional config besides the paths
        config = self.get_task_config()
        config.update({
            "ws_path": self.ws_path,
            "ws_key": self.ws_key,
            "graph_block_prefix": self.graph_block_prefix,
            "block_shape": block_shape,
            "tmp_folder": self.tmp_folder
        })
        shape = vu.get_shape(self.ws_path, self.ws_key)
        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #5
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the watershed config
        config = self.get_task_config()

        with vu.file_reader(self.graph_path) as f:
            shape = f.attrs['shape']

        factor = 2**self.scale
        block_shape = tuple(sh * factor for sh in block_shape)
        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'graph_path': self.graph_path,
            'scale': self.scale,
            'input_key': self.input_key
        })

        # prime and run the job
        self.prepare_jobs(1, block_list, config)
        self.submit_jobs(1)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(1)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)
        if len(shape) == 4:
            shape = shape[1:]

        # load the watershed config
        ws_config = self.get_task_config()

        # require output dataset
        # TODO read chunks from config
        chunks = tuple(bs // 2 for bs in block_shape)
        chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')

        # update the config with input and output paths and keys
        # as well as block shape
        ws_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'seeds_path': self.seeds_path,
            'seeds_key': self.seeds_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape
        })
        if self.mask_path != '':
            assert self.mask_key != ''
            ws_config.update({
                'mask_path': self.mask_path,
                'mask_key': self.mask_key
            })

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        self._write_log('scheduling %i blocks to be processed' %
                        len(block_list))

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, ws_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #7
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        # load the create_multiset config
        config = self.get_task_config()

        compression = config.get('compression', 'gzip')
        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=shape, chunks=tuple(block_shape),
                              compression=compression, dtype='uint8')

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'block_shape': block_shape})
        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        self._write_log('scheduling %i blocks to be processed' % len(block_list))
        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #8
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        with vu.file_reader(self.problem_path, 'r') as f:
            shape = f[self.graph_key].attrs['shape']
        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        config = self.get_task_config()
        tmp_file = os.path.join(self.tmp_folder, 'stitch_edges.n5')
        config.update({
            'input_path': tmp_file,
            'problem_path': self.problem_path,
            'features_key': self.features_key,
            'graph_key': self.graph_key,
            'assignments_path': self.assignments_path,
            'assignments_key': self.assignments_key,
            'edge_size_threshold': self.edge_size_threshold,
            'serialize_edges': self.serialize_edges,
            'n_jobs': n_jobs
        })

        with vu.file_reader(tmp_file) as f:
            f.require_group('job_results')

        # we only have a single job to find the labeling
        self.prepare_jobs(1, None, config)
        self.submit_jobs(1)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(1)
Пример #9
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'path': self.path,
            'key': self.key,
            'tmp_folder': self.tmp_folder,
            'block_shape': block_shape
        })

        with vu.file_reader(self.path, 'r') as f:
            shape = f[self.key].shape

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)
        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        out_shape = (self.number_of_labels, 11)
        out_chunks = (min(self.number_of_labels, 100000), 11)
        block_list = vu.blocks_in_volume([out_shape[0]], [out_chunks[0]])

        # create output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=out_shape,
                              chunks=out_chunks, compression='gzip',
                              dtype='float64')

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({'input_path': self.input_path,
                       'input_key': self.input_key,
                       'output_path': self.output_path,
                       'output_key': self.output_key,
                       'out_shape': out_shape,
                       'out_chunks': out_chunks})

        # prime and run the jobs
        self.prepare_jobs(self.max_jobs, block_list, config, self.prefix)
        self.submit_jobs(self.max_jobs, self.prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.prefix)
        self.check_jobs(self.max_jobs, self.prefix)
Пример #11
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()
        chunk_size = min(10000, self.number_of_labels)

        # require the output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, dtype='float32', shape=(self.number_of_labels,),
                              chunks=(chunk_size,), compression='gzip')

        # temporary output dataset
        tmp_path = os.path.join(self.tmp_folder, 'region_features_tmp.n5')
        tmp_key = 'block_feats'
        # update the task config
        config.update({'output_path': self.output_path, 'output_key': self.output_key,
                       'tmp_path': tmp_path, 'tmp_key': tmp_key,
                       'node_chunk_size': chunk_size})

        node_block_list = vu.blocks_in_volume([self.number_of_labels], [chunk_size])

        n_jobs = min(len(node_block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, node_block_list, config, consecutive_blocks=True)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        config = self.get_task_config()
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'filter_name': self.filter_name, 'sigma': self.sigma,
                       'halo': self.halo, 'block_shape': block_shape})

        shape = vu.get_shape(self.input_path, self.input_key)
        chunks = tuple(min(bs // 2, sh) for bs, sh in zip(block_shape, shape))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=shape, dtype='float32',
                              compression='gzip', chunks=chunks)

        if self.n_retries == 0:
            # get shape and make block config
            shape = vu.get_shape(self.input_path, self.input_key)
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #13
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        # get shape and check dimension and channel param
        shape = vu.get_shape(self.input_path, self.input_key)
        if len(shape) == 4 and self.channel is None:
            raise RuntimeError("Got 4d input, but channel was not specified")
        if len(shape) == 4 and self.channel >= shape[0]:
            raise RuntimeError("Channel %i is to large for n-channels %i" %
                               (self.channel, shape[0]))
        if len(shape) == 3 and self.channel is not None:
            raise RuntimeError("Channel was specified, but input is only 3d")

        if len(shape) == 4:
            shape = shape[1:]

        # temporary output dataset
        output_path = os.path.join(self.tmp_folder, 'region_features_tmp.n5')
        output_key = 'block_feats'

        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'labels_path': self.labels_path,
            'labels_key': self.labels_key,
            'output_path': output_path,
            'output_key': output_key,
            'block_shape': block_shape,
            'channel': self.channel
        })

        # require the temporary output data-set
        f_out = z5py.File(output_path)
        f_out.require_dataset(output_key,
                              shape=shape,
                              compression='gzip',
                              chunks=tuple(block_shape),
                              dtype='float32')

        if self.n_retries == 0:
            # get shape and make block config
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config, self.prefix)
        self.submit_jobs(n_jobs, self.prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs, self.prefix)
Пример #14
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        config = self.get_task_config()
        config.update({
            'shape': self.shape,
            'offsets_path': self.offsets_path,
            'overlap_prefix': self.overlap_prefix,
            'save_prefix': self.save_prefix,
            'overlap_threshold': self.overlap_threshold,
            'block_shape': block_shape,
            'tmp_folder': self.tmp_folder,
            'halo': self.halo
        })

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
Пример #15
0
    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        if self.compute_cell_features:
            config = self._update_config_for_cells(config)
        else:
            config = self._update_config_for_nuclei(config)

        # TODO match block size and number of blocks
        # we hard-code the chunk-size to 1000 for now
        number_of_labels = self._get_number_of_labels()
        block_len = self._compute_block_len(number_of_labels)
        block_list = vu.blocks_in_volume([number_of_labels], [block_len])
        config.update({'block_len': block_len,
                       'compute_cell_features': self.compute_cell_features,
                       'number_of_labels': number_of_labels})

        prefix = 'cells' if self.compute_cell_features else 'nuclei'
        # prime and run the job
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, config, prefix)
        self.submit_jobs(n_jobs, prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs, prefix)
Пример #16
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)
        if len(shape) == 4:
            shape = shape[1:]

        # load the agglomerate config
        config = self.get_task_config()

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'block_shape': block_shape, 'have_ignore_label': self.have_ignore_label})

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        self._write_log('scheduling %i blocks to be processed' % len(block_list))
        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #17
0
def debug_vol():
    path = '../data.n5'
    key = 'volumes/cilia/segmentation'
    f = open_file(path)
    ds = f[key]
    shape = ds.shape
    block_shape = ds.chunks

    roi_begin = [7216, 12288, 7488]
    roi_end = [8640, 19040, 11392]

    blocks, blocking = blocks_in_volume(shape,
                                        block_shape,
                                        roi_begin,
                                        roi_end,
                                        return_blocking=True)
    print("Have", len(blocks), "blocks in roi")

    # check reading all blocks
    for block_id in blocks:
        print("Check block", block_id)
        block = blocking.getBlock(block_id)
        bb = block_to_bb(block)
        d = ds[bb]
        print("Have block", block_id)

    print("All checks passsed")
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)
        assert len(shape) == 4, "Need 4d input for MWS"
        n_channels = shape[0]
        shape = shape[1:]

        # TODO make optional which channels to choose
        assert len(self.offsets) == n_channels,\
            "%i, %i" % (len(self.offsets), n_channels)
        assert all(len(off) == 3 for off in self.offsets)

        config = self.get_task_config()
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'offsets': self.offsets,
            'halo': self.halo
        })

        # check if we have a mask and add to the config if we do
        if self.mask_path != '':
            assert self.mask_key != ''
            config.update({
                'mask_path': self.mask_path,
                'mask_key': self.mask_key
            })

        # get chunks
        chunks = config.pop('chunks', None)
        if chunks is None:
            chunks = tuple(bs // 2 for bs in block_shape)
        # clip chunks
        chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape))

        # make output dataset
        compression = config.pop('compression', 'gzip')
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              dtype='uint64',
                              compression=compression,
                              chunks=chunks)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)
        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        shape = vu.get_shape(self.labels_path, self.labels_key)
        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        graph_key = 's0/graph'
        with vu.file_reader(self.graph_path, 'r') as f:
            n_edges = f[graph_key].attrs['numberOfEdges']

        config = self.get_task_config()
        tmp_file = os.path.join(self.tmp_folder, 'stitch_edges.n5')
        config.update({
            'out_path': tmp_file,
            'graph_path': self.graph_path,
            'labels_path': self.labels_path,
            'labels_key': self.labels_key,
            'n_edges': n_edges,
            'block_shape': block_shape
        })

        with vu.file_reader(tmp_file) as f:
            f.require_group('job_results')

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
Пример #20
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        config = self.get_task_config()
        config.update({
            'output_path': self.output_path,
            'output_key': self.output_key,
            'tmp_folder': self.tmp_folder,
            'n_jobs': n_jobs,
            'number_of_labels': int(self.number_of_labels)
        })

        # we only have a single job to find the labeling
        self.prepare_jobs(1, None, config)
        self.submit_jobs(1)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(1)
Пример #21
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)

        # we don't need any additional config besides the paths
        config = {
            "input_path": self.input_path,
            "input_key": self.input_key,
            "block_shape": block_shape,
            "tmp_folder": self.tmp_folder,
            "return_counts": self.return_counts
        }
        self._write_log('scheduling %i blocks to be processed' %
                        len(block_list))

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        # load the downscale_multiset config
        config = self.get_task_config()

        compression = config.get('compression', 'gzip')
        out_shape = downscale_shape(shape, self.scale_factor)
        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=out_shape,
                              chunks=tuple(block_shape),
                              compression=compression,
                              dtype='uint8')

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'scale_factor': self.scale_factor,
            'restrict_set': self.restrict_set,
            'effective_scale_factor': self.effective_scale_factor,
            'block_shape': block_shape
        })

        # if we have a roi, we need to adjust it given the effective scaling factor
        if roi_begin is not None:
            roi_begin = [
                rb // eff
                for rb, eff in zip(roi_begin, self.effective_scale_factor)
            ]
            roi_end = [
                re // eff
                for re, eff in zip(roi_end, self.effective_scale_factor)
            ]

        block_list = vu.blocks_in_volume(out_shape, block_shape, roi_begin,
                                         roi_end)
        self._write_log('scheduling %i blocks to be processed' %
                        len(block_list))
        n_jobs = min(len(block_list), self.max_jobs)
        self._write_log("submitting %i blocks with %i jobs" %
                        (len(block_list), n_jobs))

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config, self.scale_prefix)
        self.submit_jobs(n_jobs, self.scale_prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs, self.scale_prefix)
Пример #23
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'output_path': self.output_path,
            'tmp_folder': self.tmp_folder,
            'n_jobs': n_jobs
        })

        # prime and run the jobs
        self.prepare_jobs(1, None, config)
        self.submit_jobs(1)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(1)
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        config = {
            'input_path': self.input_path,
            'input_key': self.input_key,
            'tmp_folder': self.tmp_folder,
            'n_jobs': n_jobs,
            'size_threshold': self.size_threshold
        }

        # we only have a single job to find the labeling
        self.prepare_jobs(1, None, config)
        self.submit_jobs(1)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        save_path = os.path.join(self.tmp_folder, 'discard_ids.npy')
        self._write_log("saving results to %s" % save_path)
        self.check_jobs(1)
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        config = self.get_task_config()
        config.update({
            'affinity_path': self.affinity_path,
            'affinity_key': self.affinity_key,
            'objects_path': self.objects_path,
            'objects_key': self.objects_key,
            'offsets': self.offsets,
            'block_shape': block_shape
        })

        shape = vu.get_shape(self.affinity_path, self.affinity_key)[1:]
        chunks = vu.file_reader(
            self.affinity_path)[self.affinity_key].chunks[1:]
        assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks))

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        with vu.file_reader(self.features_path) as f:
            feat_shape = f[self.features_key].shape
        n_edges = feat_shape[0]
        # chunk size = 64**3
        chunk_size = min(262144, n_edges)

        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=(n_edges, ),
                              compression='gzip',
                              dtype='float32',
                              chunks=(chunk_size, ))
            f.require_dataset(self.output_labels_key,
                              shape=(n_edges, ),
                              compression='gzip',
                              dtype='uint32',
                              chunks=(chunk_size, ))

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({
            'rf_path': self.rf_path,
            'features_path': self.features_path,
            'features_key': self.features_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'output_labels_key': self.output_labels_key,
            'chunk_size': chunk_size,
            'n_edges': n_edges,
            'edge_classes': self.edge_classes
        })

        if self.n_retries == 0:
            edge_block_list = vu.blocks_in_volume([n_edges], [chunk_size])
        else:
            edge_block_list = self.block_list
            self.clean_up_for_retry(edge_block_list)

        n_jobs = min(len(edge_block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs,
                          edge_block_list,
                          config,
                          consecutive_blocks=True)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Пример #27
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, global_roi_begin, global_roi_end = self.global_config_values()
        self.init(shebang)

        assert (self.roi_begin is None) == (self.roi_end is None),\
            "Either both or neither of `roi_begin` and `roi_end` must be specified"
        # if we have don't jave a task-sppecific roi, set roi to global roi
        if self.roi_begin is None:
            roi_begin = global_roi_begin
            roi_end = global_roi_end
        else:
            # otherwise set to task-specific roi
            roi_begin = self.roi_begin
            roi_end = self.roi_end
            # if we also have a global roi, check that the task-specific roi
            # is in the global roi
            if global_roi_begin is not None:
                assert all(rb >= grb for rb, grb in zip(roi_begin, global_roi_begin))
            if global_roi_end is not None:
                assert all(re <= geb for eb, geb in zip(roi_end, global_roi_end))

        # read shape
        with vu.file_reader(self.problem_path, 'r') as f:
            shape = tuple(f.attrs['shape'])

        # make output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=shape, dtype='uint64',
                              chunks=(25, 256, 256), compression='gzip')

        factor = 2**self.scale
        block_shape = tuple(bs * factor for bs in block_shape)

        # update the config with input and graph paths and keys
        # as well as block shape
        config = self.get_task_config()
        config.update({'problem_path': self.problem_path, 'scale': self.scale,
                       'block_shape': block_shape,
                       'ws_path': self.ws_path, 'ws_key': self.ws_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'sub_graph_identifier': self.sub_graph_identifier,
                       'sub_result_identifier': self.sub_result_identifier})

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        # prime and run the jobs
        prefix = 's%i' % self.scale
        self.prepare_jobs(1, block_list, config, prefix)
        self.submit_jobs(1, prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(1, prefix)
    def run_impl(self):
        # get the global config and init configs
        # shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        shebang, block_shape, roi_begin, roi_end, block_list_path\
            = self.global_config_values(with_block_list_path=True)
        self.init(shebang)

        with vu.file_reader(self.problem_path, 'r') as f:
            shape = tuple(f.attrs['shape'])

        factor = 2**self.scale
        block_shape = tuple(bs * factor for bs in block_shape)

        # update the config with input and graph paths and keys
        # as well as block shape
        config = self.get_task_config()
        config.update({
            'problem_path': self.problem_path,
            'scale': self.scale,
            'block_shape': block_shape,
            'lifted_prefix': self.lifted_prefix
        })

        # make output datasets
        out_key = 's%i/sub_results_lmc' % self.scale
        with vu.file_reader(self.problem_path) as f:
            out = f.require_group(out_key)
            # NOTE, gzip may fail for very small inputs, so we use raw compression for now
            # might be a good idea to give blosc a shot ...
            out.require_dataset('cut_edge_ids',
                                shape=shape,
                                chunks=block_shape,
                                compression='raw',
                                dtype='uint64')
            out.require_dataset('node_result',
                                shape=shape,
                                chunks=block_shape,
                                compression='raw',
                                dtype='uint64')

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end, block_list_path)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        prefix = 's%i' % self.scale
        self.prepare_jobs(n_jobs, block_list, config, prefix)
        self.submit_jobs(n_jobs, prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs, prefix)
Пример #29
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'ws_path': self.ws_path,
            'ws_key': self.ws_key,
            'input_path': self.input_path,
            'input_key': self.input_key,
            'block_shape': block_shape,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'ignore_label': self.ignore_label
        })

        shape = vu.get_shape(self.ws_path, self.ws_key)
        chunks = tuple(min(bs, sh) for bs, sh in zip(block_shape, shape))
        try:
            max_id = vu.file_reader(self.ws_path,
                                    'r')[self.ws_key].attrs['maxId']
        except KeyError:
            raise KeyError("Dataset %s:%s does not have attribute maxId" %
                           (self.ws_path, self.ws_key))

        # create output dataset
        with vu.file_reader(self.output_path) as f:
            ds_out = f.require_dataset(self.output_key,
                                       shape=shape,
                                       dtype='uint64',
                                       chunks=chunks,
                                       compression='gzip')
            # need to serialize the label max-id here for
            # the merge_node_labels task
            ds_out.attrs['maxId'] = int(max_id)

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)
        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config, self.prefix)
        self.submit_jobs(n_jobs, self.prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs, self.prefix)
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        # require output dataset
        chunks = tuple(bs // 2 if bs % 2 == 0 else bs for bs in block_shape)
        chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape))
        with vu.file_reader(self.output_path) as f:
            if self.output_key in f:
                chunks = f[self.output_key].chunks
            assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks)), "%s, %s" % (str(block_shape),
                                                                                         str(chunks))
            f.require_dataset(self.output_key, shape=shape, chunks=chunks,
                              compression='gzip', dtype='uint64')

        n_threads = self.get_task_config().get('threads_per_core', 1)

        # check if input and output datasets are identical
        in_place = (self.input_path == self.output_path) and (self.input_key == self.output_key)

        if self.assignment_key is None:
            assert os.path.splitext(self.assignment_path)[-1] == '.pkl',\
                "Assignments need to be pickled map if no key is given"

        # update the config with input and output paths and keys
        # as well as block shape
        config = {'input_path': self.input_path, 'input_key': self.input_key,
                  'block_shape': block_shape, 'n_threads': n_threads,
                  'assignment_path': self.assignment_path, 'assignment_key': self.assignment_key}
        if self.offset_path != '':
            config.update({'offset_path': self.offset_path})
        # we only add output path and key if we do not write in place
        if not in_place:
            config.update({'output_path': self.output_path, 'output_key': self.output_key})

        # get block list and jobs
        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list, self.identifier)
        self._write_log('scheduling %i blocks to be processed' % len(block_list))

        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config, self.identifier)
        self.submit_jobs(n_jobs, self.identifier)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.identifier)
        self.check_jobs(n_jobs, self.identifier)