def solve(predict_config, worker_config, data_config, graph_config, solve_config, num_block_workers, block_size, roi_offset, roi_size, context, solve_block, base_dir, experiment, train_number, predict_number, graph_number, solve_number, queue, singularity_container, mount_dirs, **kwargs): source_roi = daisy.Roi(daisy.Coordinate(roi_offset), daisy.Coordinate(roi_size)) solve_setup_dir = os.path.join( os.path.join(base_dir, experiment), "04_solve/setup_t{}_p{}_g{}_s{}".format(train_number, predict_number, graph_number, solve_number)) block_write_roi = daisy.Roi((0, 0, 0), block_size) block_read_roi = block_write_roi.grow(context, context) total_roi = source_roi.grow(context, context) logger.info("Solving in %s", total_roi) daisy.run_blockwise( total_roi, block_read_roi, block_write_roi, process_function=lambda: start_worker(predict_config, worker_config, data_config, graph_config, solve_config, queue, singularity_container, mount_dirs, solve_block, solve_setup_dir), num_workers=num_block_workers, fit='shrink') logger.info("Finished solving, parameters id is %s", solve_number)
def fetch(in_vol, voxel_size, roi_offset, roi_shape, out_file, out_ds, num_workers): total_roi = daisy.Roi((roi_offset), (roi_shape)) read_roi = daisy.Roi((0, ) * 3, (4800, 1280, 1280)) write_roi = read_roi logging.info('Creating out dataset...') raw_out = daisy.prepare_ds(out_file, out_ds, total_roi, voxel_size, dtype=np.uint8, write_roi=write_roi) logging.info('Writing to dataset...') daisy.run_blockwise(total_roi, read_roi, write_roi, process_function=lambda b: fetch_in_block( b, voxel_size, in_vol, raw_out), fit='shrink', num_workers=num_workers)
def overlap(): run_datetime = datetime.datetime.now( pytz.timezone('US/Eastern')).strftime('%Y%m%dT%H%M%S.%f%z') temp_dir = os.path.join(config.temp_path, run_datetime) os.makedirs(temp_dir) fragments = daisy.open_ds(config.fragments_zarr, config.fragments_ds) groundtruth = daisy.open_ds(config.groundtruth_zarr, config.groundtruth_ds) total_roi = daisy.Roi(offset=config.roi_offset, shape=config.roi_shape) start = time.time() daisy.run_blockwise( total_roi=total_roi, read_roi=daisy.Roi(offset=(0, 0, 0), shape=config.block_size), write_roi=daisy.Roi(offset=(0, 0, 0), shape=config.block_size), process_function=lambda block: overlap_in_block( block=block, fragments=fragments, groundtruth=groundtruth, tmp_path=temp_dir), fit='shrink', num_workers=config.num_workers, read_write_conflict=False, max_retries=1) logger.info( f"Blockwise overlapping of fragments and ground truth in {time.time() - start:.3f}s") logger.debug( f"num blocks: {np.prod(np.ceil(np.array(config.roi_shape) / np.array(config.block_size)))}") frag_to_gt = overlap_reduce(tmp_path=temp_dir) pickle.dump(frag_to_gt, open(os.path.join( temp_dir, 'frag_to_gt.pickle'), 'wb')) return frag_to_gt
def relabel_connected_components(array_in, array_out, block_size, num_workers): '''Relabel connected components in an array in parallel. Args: array_in (``daisy.Array``): The array to relabel. array_out (``daisy.Array``): The array to write to. Should initially be empty (i.e., all zeros). block_size (``daisy.Coordinate``): The size of the blocks to relabel in, in world units. num_workers (``int``): The number of workers to use. ''' write_roi = daisy.Roi((0, ) * len(block_size), block_size) read_roi = write_roi.grow(array_in.voxel_size, array_in.voxel_size) total_roi = array_in.roi.grow(array_in.voxel_size, array_in.voxel_size) num_voxels_in_block = (read_roi / array_in.voxel_size).size() with tempfile.TemporaryDirectory() as tmpdir: daisy.run_blockwise( total_roi, read_roi, write_roi, process_function=lambda b: find_components_in_block( array_in, array_out, num_voxels_in_block, b, tmpdir), num_workers=num_workers, fit='shrink') nodes, edges = read_cross_block_merges(tmpdir) components = find_components(nodes, edges) logger.debug("Num nodes: %s", len(nodes)) logger.debug("Num edges: %s", len(edges)) logger.debug("Num components: %s", len(components)) write_roi = daisy.Roi((0, ) * len(block_size), block_size) read_roi = write_roi total_roi = array_in.roi daisy.run_blockwise(total_roi, read_roi, write_roi, process_function=lambda b: relabel_in_block( array_out, nodes, components, b), num_workers=num_workers, fit='shrink')
def extract_edges_blockwise(db_host, db_name, sample, edge_move_threshold, block_size, num_workers, frames=None, frame_context=1, data_dir='../01_data', use_pv_distance=False, **kwargs): voxel_size, source_roi = get_source_roi(data_dir, sample) # limit to specific frames, if given if frames: begin, end = frames begin -= frame_context end += frame_context crop_roi = daisy.Roi((begin, None, None, None), (end - begin, None, None, None)) source_roi = source_roi.intersect(crop_roi) # block size in world units block_write_roi = daisy.Roi((0, ) * 4, daisy.Coordinate(block_size)) pos_context = daisy.Coordinate((0, ) + (edge_move_threshold, ) * 3) neg_context = daisy.Coordinate((1, ) + (edge_move_threshold, ) * 3) logger.debug("Set neg context to %s", neg_context) input_roi = source_roi.grow(neg_context, pos_context) block_read_roi = block_write_roi.grow(neg_context, pos_context) print("Following ROIs in world units:") print("Input ROI = %s" % input_roi) print("Block read ROI = %s" % block_read_roi) print("Block write ROI = %s" % block_write_roi) print("Output ROI = %s" % source_roi) print("Starting block-wise processing...") # process block-wise daisy.run_blockwise(input_roi, block_read_roi, block_write_roi, process_function=lambda b: extract_edges_in_block( db_name, db_host, edge_move_threshold, b, use_pv_distance=use_pv_distance), check_function=lambda b: check_function( b, 'extract_edges', db_name, db_host), num_workers=num_workers, processes=True, read_write_conflict=False, fit='shrink')
def prepare_for_fragments(self): '''Get the fragment ID for each site in site_ids.''' logging.info(f"Preparing evaluation for fragments in " f"{self.fragments_file}...") if not os.path.exists(self.site_fragment_lut_directory): logging.info("site-fragment LUT does not exist, creating it...") os.makedirs(self.site_fragment_lut_directory) daisy.run_blockwise(self.roi, daisy.Roi((0, 0, 0), (9000, 9000, 9000)), daisy.Roi((0, 0, 0), (9000, 9000, 9000)), lambda b: self.store_lut_in_block(b), num_workers=48, fit='shrink') else: logging.info( "site-fragment LUT already exists, skipping preparation") logging.info("Reading site-fragment LUTs from " f"{self.site_fragment_lut_directory}...") lut_files = glob.glob( os.path.join(self.site_fragment_lut_directory, '*.npz')) site_fragment_lut = np.concatenate( [np.load(f)['site_fragment_lut'] for f in lut_files], axis=1) self.num_bg_sites = int( np.sum([np.load(f)['num_bg_sites'] for f in lut_files])) assert site_fragment_lut.dtype == np.uint64 logging.info( f"Found {len(site_fragment_lut[0])} sites in site-fragment LUT") # convert to dictionary site_fragment_lut = { site: fragment for site, fragment in zip(site_fragment_lut[0], site_fragment_lut[1]) } # create fragment ID array congruent to site_ids self.site_fragment_ids = np.array([ site_fragment_lut[s] if s in site_fragment_lut else 0 for s in self.site_ids ], dtype=np.uint64)
def extract_edges( db_host, db_name, soft_mask_container, soft_mask_dataset, roi_offset, roi_size, distance_threshold, block_size, num_block_workers, graph_number, **kwargs): # Define Rois: source_roi = daisy.Roi(roi_offset, roi_size) block_write_roi = daisy.Roi( (0,) * 3, daisy.Coordinate(block_size)) pos_context = daisy.Coordinate((distance_threshold,)*3) neg_context = daisy.Coordinate((distance_threshold,)*3) logger.debug("Set pos context to %s", pos_context) logger.debug("Set neg context to %s", neg_context) input_roi = source_roi.grow(neg_context, pos_context) block_read_roi = block_write_roi.grow(neg_context, pos_context) logger.info("Following ROIs in world units:") logger.info("Input ROI = %s" % input_roi) logger.info("Block read ROI = %s" % block_read_roi) logger.info("Block write ROI = %s" % block_write_roi) logger.info("Output ROI = %s" % source_roi) logger.info("Starting block-wise processing...") # process block-wise daisy.run_blockwise( input_roi, block_read_roi, block_write_roi, process_function=lambda b: extract_edges_in_block( db_name, db_host, soft_mask_container, soft_mask_dataset, distance_threshold, graph_number, b), num_workers=num_block_workers, processes=True, read_write_conflict=False, fit='shrink')
def test_negative_offset(self): logger.warning("A warning") total_roi = daisy.Roi( (-100,), (2369,)) block_write_roi = daisy.Roi( (0,), (500,)) block_read_roi = block_write_roi.grow( (100,), (100,)) outdir = self.path_to() ret = daisy.run_blockwise( total_roi, block_read_roi, block_write_roi, process_function=lambda b: self.process_block(outdir, b), num_workers=1, fit='shrink') outfiles = glob.glob(os.path.join(outdir, '*.block')) block_ids = sorted([ int(path.split('/')[-1].split('.')[0]) for path in outfiles ]) self.assertTrue(ret) self.assertEqual(len(block_ids), 5)
def test_worker_failure(self): total_roi = daisy.Roi((0,), (100,)) read_roi = daisy.Roi((0,), (5,)) write_roi = daisy.Roi((0,), (3,)) outdir = self.path_to() ret = daisy.run_blockwise( total_roi=total_roi, read_roi=read_roi, write_roi=write_roi, process_function=lambda: self.worker(outdir, fail=16), num_workers=10) outfiles = glob.glob(os.path.join(outdir, '*.block')) block_ids = sorted([ int(path.split('/')[-1].split('.')[0]) for path in outfiles ]) self.assertFalse(ret) expected_block_ids = list(range(32)) expected_block_ids.remove(16) self.assertEqual(block_ids, expected_block_ids)
def parallel_lsd_agglomerate(lsds, fragments, rag_provider, lsd_extractor, block_size, context, num_workers): '''Agglomerate fragments in parallel using only the shape descriptors. Args: lsds (`class:daisy.Array`): An array containing the LSDs. fragments (`class:daisy.Array`): An array containing fragments. rag_provider (`class:SharedRagProvider`): A RAG provider to read nodes from and write found edges to. lsd_extractor (``LsdExtractor``): The local shape descriptor object used to compute the difference between the segmentation and the target LSDs. block_size (``tuple`` of ``int``): The size of the blocks to process in parallel, in world units. context (``tuple`` of ``int``): The context to consider for agglomeration, in world units. num_workers (``int``): The number of parallel workers. Returns: True, if all tasks succeeded. ''' assert fragments.data.dtype == np.uint64 shape = lsds.shape[1:] context = daisy.Coordinate(context) total_roi = lsds.roi.grow(context, context) read_roi = daisy.Roi((0, ) * lsds.roi.dims(), block_size).grow(context, context) write_roi = daisy.Roi((0, ) * lsds.roi.dims(), block_size) return daisy.run_blockwise( total_roi, read_roi, write_roi, lambda b: agglomerate_in_block(lsds, fragments, rag_provider, lsd_extractor, b), lambda b: block_done(b, rag_provider), num_workers=num_workers, read_write_conflict=False, fit='shrink')
def test_multidim(self): total_roi = daisy.Roi( (199, -100, -100, -100), (12, 5140, 2248, 2369)) block_write_roi = daisy.Roi( (0, 0, 0, 0), (5, 500, 500, 500)) block_read_roi = block_write_roi.grow( (1, 100, 100, 100), (1, 100, 100, 100)) outdir = self.path_to() ret = daisy.run_blockwise( total_roi, block_read_roi, block_write_roi, process_function=lambda b: self.process_block(outdir, b), num_workers=8, processes=False, fit='shrink') outfiles = glob.glob(os.path.join(outdir, '*.block')) block_ids = sorted([ int(path.split('/')[-1].split('.')[0]) for path in outfiles ]) self.assertTrue(ret) self.assertEqual(len(block_ids), 500)
def downscale(in_array, out_array, factor, write_size): print("Downsampling by factor %s" % (factor, )) dims = in_array.roi.dims() block_roi = daisy.Roi((0, ) * dims, write_size) print("Processing ROI %s with blocks %s" % (out_array.roi, block_roi)) daisy.run_blockwise(out_array.roi, block_roi, block_roi, process_function=lambda b: downscale_block( in_array, out_array, factor, b), read_write_conflict=False, num_workers=60, max_retries=0, fit='shrink')
def solve(predict_config, worker_config, data_config, graph_config, solve_config, num_block_workers, block_size, roi_offset, roi_size, context, solve_block, graph_number, solve_number, queue, singularity_container, mount_dirs, **kwargs): source_roi = daisy.Roi(daisy.Coordinate(roi_offset), daisy.Coordinate(roi_size)) solve_setup_dir = Path("solve_setup_dir") block_write_roi = daisy.Roi((0, 0, 0), block_size) block_read_roi = block_write_roi.grow(context, context) total_roi = source_roi.grow(context, context) logger.info("Solving in %s", total_roi) daisy.run_blockwise( total_roi, block_read_roi, block_write_roi, process_function=lambda: start_worker( predict_config, worker_config, data_config, graph_config, solve_config, queue, singularity_container, mount_dirs, solve_block, solve_setup_dir, ), num_workers=num_block_workers, fit="shrink", ) logger.info("Finished solving, parameters id is %s", solve_number)
def test_callback(self): total_roi = daisy.Roi((0,), (100,)) read_roi = daisy.Roi((0,), (5,)) write_roi = daisy.Roi((0,), (3,)) outdir = self.path_to() ret = daisy.run_blockwise( total_roi=total_roi, read_roi=read_roi, write_roi=write_roi, process_function=lambda b: self.process_block(outdir, b), num_workers=10) outfiles = glob.glob(os.path.join(outdir, '*.block')) block_ids = sorted([ int(path.split('/')[-1].split('.')[0]) for path in outfiles ]) self.assertTrue(ret) self.assertEqual(block_ids, list(range(32)))
def _predict_affinities_daisy(): import pathlib import argparse parser = argparse.ArgumentParser() parser.add_argument('--input-container', type=str, required=True, help='N5 container') # parser.add_argument('--input-dataset', type=str, required=True, help='3-dimensional') parser.add_argument('--output-container', type=str, required=True, help='N5 container') parser.add_argument('--input', type=str, nargs=2, metavar=('dataset', 'tensor'), help='For example --input volumes/raw Placeholder:0', required=True) parser.add_argument( '--output', type=str, action='append', nargs=4, metavar=('dataset', 'dtype', 'num_channels', 'tensor'), help= 'For example --output volumes/affinities/prediction float32 3 Slice:0. num-channels<=0 means no channel axis', required=True) # parser.add_argument('--output-dataset', type=str) parser.add_argument('--gpus', required=True, type=int, nargs='+') parser.add_argument( '--num-workers', type=int, default=1, help='Number of CPU workers per GPU for parallel processing') parser.add_argument('--input-voxel-size', nargs=3, type=int, default=(360, 36, 36), help='zyx') parser.add_argument('--output-voxel-size', nargs=3, type=int, default=(120, 108, 108), help='zyx') parser.add_argument('--network-input-shape', nargs=3, type=int, default=(91, 862, 862), help='zyx') parser.add_argument('--network-output-shape', nargs=3, type=int, default=(207, 214, 214), help='zyx') parser.add_argument('--experiment-directory', required=True) parser.add_argument('--iteration', type=int, required=True) parser.add_argument('--weight-graph-pattern', default='unet_checkpoint_%d', help='Relative to experiment-directory.') parser.add_argument('--meta-graph-filename', default='unet-inference.meta', help='Relative to experiment-directory.') # parser.add_argument('--input-placeholder-tensor', default='Placeholder:0') # parser.add_argument('--output-placeholder-tensor', default='Slice:0') parser.add_argument('--output-compression', default='raw') parser.add_argument( '--net-io-names', default=None, required=False, help= 'Look-up tensor names from json, if specified. Use specified values from --input/--output as tensor names directly, otherwise.' ) args = parser.parse_args() input_voxel_size = np.array(args.input_voxel_size, dtype=np.float64) output_voxel_size = np.array(args.output_voxel_size, dtype=np.float64) if args.net_io_names: with open(args.net_io_names, 'r') as f: net_io_names = json.load(f) def tensor_name(name): return net_io_names[name] else: def tensor_name(name): return name experiment_directory = args.experiment_directory input_container = args.input_container output_container = pathlib.Path(args.output_container) output_dir = output_container.parent iteration = args.iteration network_input_shape = np.array(args.network_input_shape, dtype=np.uint64) network_input_shape_world = np.array(tuple( n * i for n, i in zip(network_input_shape, input_voxel_size)), dtype=np.float64) network_output_shape = np.array(args.network_output_shape, dtype=np.uint64) network_output_shape_world = np.array(tuple( n * o for n, o in zip(network_output_shape, output_voxel_size)), dtype=np.float64) shape_diff_world = network_input_shape_world - network_output_shape_world print(args.input, args.output) inputs = tuple((ds, tensor_name(tensor)) for ds, tensor in [args.input]) outputs = tuple((ds, np.dtype(dtype), int(nc), tensor_name(tensor)) for ds, dtype, nc, tensor in args.output) with z5py.File(path=input_container, use_zarr_format=False, mode='r') as f: ds = f[inputs[0][0]] input_dataset_size = ds.shape input_dataset_size_world = np.array(tuple( vs * s for vs, s in zip(input_voxel_size, input_dataset_size)), dtype=np.float64) output_dataset_roi_world = Roi(shape=input_dataset_size_world, offset=np.array( (0, ) * len(input_dataset_size_world), dtype=np.float64)) output_dataset_roi_world = output_dataset_roi_world.snap_to_grid( network_output_shape_world, mode='grow') output_dataset_roi = output_dataset_roi_world / tuple(output_voxel_size) _logger.info('input dataset size world: %s', input_dataset_size_world) _logger.info('output dataset roi world: %s', output_dataset_roi_world) _logger.info('output datset roi: %s', output_dataset_roi) _logger.info('output network size: %s', network_output_shape) _logger.info('output network size world: %s', network_output_shape_world) weight_graph = os.path.join(experiment_directory, args.weight_graph_pattern % iteration) meta_graph = os.path.join(experiment_directory, args.meta_graph_filename) if not os.path.isdir(str(output_container)): os.makedirs(str(output_container)) with z5py.File(str(output_container), use_zarr_format=False) as f: for output_dataset, dtype, num_channels, tensor in outputs: ds = f.require_dataset( name=output_dataset, shape=(num_channels, ) + tuple(int(s) for s in output_dataset_roi.get_shape()) if num_channels > 0 else tuple( int(s) for s in output_dataset_roi.get_shape()), dtype=dtype, chunks=(1, ) + tuple(int(n) for n in network_output_shape) if num_channels > 0 else tuple( int(n) for n in network_output_shape), compression='raw') ds.attrs['resolution'] = tuple(args.output_voxel_size[::-1]) ds.attrs['offset'] = tuple( output_dataset_roi_world.get_begin()[::-1]) workflow_info = { 'input': { 'container': input_container, 'dataset': inputs[0][0], 'tensor': inputs[0][1] }, 'output': { 'tensor': tensor }, 'network': { 'experiment_directory': experiment_directory, 'weight_graph': weight_graph, 'meta_graph': meta_graph, 'iteration': iteration } } ds.attrs['workflow_info'] = workflow_info gpus = args.gpus num_workers = len(gpus) pipeline_factory = _default_pipeline_factory( input_container=input_container, input=inputs[0], output_filename=str(output_container.name), output_dir=str(output_dir), outputs=tuple((ds, tensor) for ds, _, _, tensor in outputs), output_compression_type=args.output_compression, weight_graph=weight_graph, meta_graph=meta_graph, input_voxel_size=input_voxel_size, output_voxel_size=output_voxel_size) process_function = make_process_function( actor_id_to_gpu_mapping=lambda id: gpus[id], pipeline_factory=pipeline_factory, input_voxel_size=input_voxel_size, output_voxel_size=output_voxel_size, outputs=tuple((ds, tensor) for ds, _, _, tensor in outputs), num_cpu_workers=args.num_workers) total_roi = output_dataset_roi_world.grow( amount_neg=tuple(shape_diff_world / 2), amount_pos=tuple(shape_diff_world / 2)) read_roi = Roi(shape=tuple(network_input_shape_world), offset=tuple(-shape_diff_world / 2)) write_roi = Roi(shape=tuple(network_output_shape_world), offset=tuple(np.array((0, ) * len(input_voxel_size)))) _logger.info('Running blockwise!') _logger.info('total roi: %s', total_roi) _logger.info('read roi: %s', read_roi) _logger.info('write roi: %s', write_roi) daisy.run_blockwise(total_roi=total_roi, read_roi=read_roi, write_roi=write_roi, process_function=process_function, num_workers=num_workers, read_write_conflict=False)
def parallel_watershed( affs, rag_provider, block_size, context, fragments_out, num_workers, mask=None, fragments_in_xy=False, epsilon_agglomerate=0.0, filter_fragments=0.0, replace_sections=None): '''Extract fragments from affinities using watershed. Args: affs (`class:daisy.Array`): An array containing affinities. rag_provider (`class:SharedRagProvider`): A RAG provider to write nodes for extracted fragments to. This does not yet add adjacency edges, for that, an agglomeration method should be called after this function. block_size (``tuple`` of ``int``): The size of the blocks to process in parallel in world units. context (``tuple`` of ``int``): The context to consider for fragment extraction, in world units. fragments_out (`class:daisy.Array`): An array to store fragments in. Should be of ``dtype`` ``uint64``. num_workers (``int``): The number of parallel workers. mask (`class:daisy.Array`): A dataset containing a mask. If given, fragments are only extracted for masked-in (==1) areas. fragments_in_xy (``bool``): Whether to extract fragments for each xy-section separately. epsilon_agglomerate (``float``): Perform an initial waterz agglomeration on the extracted fragments to this threshold. Skip if 0 (default). filter_fragments (``float``): Filter fragments that have an average affinity lower than this value. replace_sections (``list`` of ``int``): Replace fragments data with zero in given sections (useful if large artifacts are causing issues). List of section numbers (in voxels) Returns: True, if all tasks succeeded. ''' assert fragments_out.data.dtype == np.uint64 if context is None: context = daisy.Coordinate((0,)*affs.roi.dims()) else: context = daisy.Coordinate(context) total_roi = affs.roi.grow(context, context) read_roi = daisy.Roi((0,)*affs.roi.dims(), block_size).grow(context, context) write_roi = daisy.Roi((0,)*affs.roi.dims(), block_size) num_voxels_in_block = (write_roi/affs.voxel_size).size() return daisy.run_blockwise( total_roi, read_roi, write_roi, lambda b: watershed_in_block( affs=affs, block=b, context=context, rag_provider=rag_provider, fragments_out=fragments_out, num_voxels_in_block=num_voxels_in_block, fragments_in_xy=fragments_in_xy, epsilon_agglomerate=epsilon_agglomerate, mask=mask, filtered_fragments=filtered_fragments), lambda b: block_done(b, rag_provider), num_workers=num_workers, read_write_conflict=False, fit='shrink')
parser.add_argument('--num_workers', '-nw', type=int, help="Number of processes to spawn", default=1) parser.add_argument('--read_write_conflict', '-rwc', action='store_true', help="Flag to not schedule overlapping blocks" " at the same time. Default is false") args = parser.parse_args() ndims = len(args.total_roi_size) # define total region of interest (roi) total_roi_start = daisy.Coordinate((0,)*ndims) total_roi_size = daisy.Coordinate(args.total_roi_size) total_roi = daisy.Roi(total_roi_start, total_roi_size) # define block read and write rois block_read_size = daisy.Coordinate(args.block_read_size) block_write_size = daisy.Coordinate(args.block_write_size) context = (block_read_size - block_write_size) / 2 block_read_roi = daisy.Roi(total_roi_start, block_read_size) block_write_roi = daisy.Roi(context, block_write_size) # call run_blockwise daisy.run_blockwise( total_roi, block_read_roi, block_write_roi, process_function=process_function, read_write_conflict=args.read_write_conflict, num_workers=args.num_workers)
def predict_blockwise(base_dir, experiment, train_number, predict_number, iteration, in_container_spec, in_container, in_dataset, in_offset, in_size, out_container, db_name, db_host, singularity_container, num_cpus, num_cache_workers, num_block_workers, queue, mount_dirs, **kwargs): '''Run prediction in parallel blocks. Within blocks, predict in chunks. Args: experiment (``string``): Name of the experiment (cremi, fib19, fib25, ...). setup (``string``): Name of the setup to predict. iteration (``int``): Training iteration to predict from. raw_file (``string``): raw_dataset (``string``): auto_file (``string``): auto_dataset (``string``): Paths to the input autocontext datasets (affs or lsds). Can be None if not needed. out_file (``string``): Path to directory where zarr should be stored **Note: out_dataset no longer needed as input, build out_dataset from config outputs dictionary generated in mknet.py file_name (``string``): Name of output file block_size_in_chunks (``tuple`` of ``int``): The size of one block in chunks (not voxels!). A chunk corresponds to the output size of the network. num_workers (``int``): How many blocks to run in parallel. queue (``string``): Name of queue to run inference on (i.e slowpoke, gpu_rtx, gpu_any, gpu_tesla, gpu_tesla_large) ''' predict_setup_dir = os.path.join( os.path.join(base_dir, experiment), "02_predict/setup_t{}_p{}".format(train_number, predict_number)) train_setup_dir = os.path.join(os.path.join(base_dir, experiment), "01_train/setup_t{}".format(train_number)) # from here on, all values are in world units (unless explicitly mentioned) # get ROI of source source = daisy.open_ds(in_container_spec, in_dataset) logger.info('Source dataset has shape %s, ROI %s, voxel size %s' % (source.shape, source.roi, source.voxel_size)) # Read network config predict_net_config = os.path.join(predict_setup_dir, 'predict_net.json') with open(predict_net_config) as f: logger.info('Reading setup config from {}'.format(predict_net_config)) net_config = json.load(f) outputs = net_config['outputs'] # get chunk size and context net_input_size = daisy.Coordinate( net_config['input_shape']) * source.voxel_size net_output_size = daisy.Coordinate( net_config['output_shape']) * source.voxel_size context = (net_input_size - net_output_size) / 2 logger.info('Network context: {}'.format(context)) # get total input and output ROIs input_roi = source.roi.grow(context, context) output_roi = source.roi # create read and write ROI block_read_roi = daisy.Roi((0, 0, 0), net_input_size) - context block_write_roi = daisy.Roi((0, 0, 0), net_output_size) logger.info('Preparing output dataset...') for output_name, val in outputs.items(): out_dims = val['out_dims'] out_dtype = val['out_dtype'] out_dataset = 'volumes/%s' % output_name ds = daisy.prepare_ds(out_container, out_dataset, output_roi, source.voxel_size, out_dtype, write_roi=block_write_roi, num_channels=out_dims, compressor={ 'id': 'gzip', 'level': 5 }) logger.info('Starting block-wise processing...') client = pymongo.MongoClient(db_host) db = client[db_name] if 'blocks_predicted' not in db.list_collection_names(): blocks_predicted = db['blocks_predicted'] blocks_predicted.create_index([('block_id', pymongo.ASCENDING)], name='block_id') else: blocks_predicted = db['blocks_predicted'] # process block-wise succeeded = daisy.run_blockwise( input_roi, block_read_roi, block_write_roi, process_function=lambda: predict_worker( train_setup_dir, predict_setup_dir, predict_number, train_number, experiment, iteration, in_container, in_dataset, out_container, db_host, db_name, queue, singularity_container, num_cpus, num_cache_workers, mount_dirs), check_function=lambda b: check_block(blocks_predicted, b), num_workers=num_block_workers, read_write_conflict=False, fit='overhang') if not succeeded: raise RuntimeError("Prediction failed for (at least) one block")
logger.info(f'Processing data to {config.out_file}/{config.out_ds_name}') output_dataset = daisy.prepare_ds( config.out_file, config.out_ds_name, total_roi=output_roi, voxel_size=dataset.voxel_size, dtype=dataset.dtype, write_size=block_write_roi.get_shape()) # make task task = daisy.Task( 'GaussianSmoothingTask', total_roi, block_read_roi, block_write_roi, process_function=lambda b: smooth( b, dataset, output_dataset, sigma=config.sigma), read_write_conflict=False, num_workers=config.num_workers, fit='shrink' ) # run task ret = daisy.run_blockwise([task]) if ret: logger.info("Ran all blocks successfully!") else: logger.info("Did not run all blocks successfully...")
def parallel_aff_agglomerate( affs, fragments, rag_provider, block_size, context, merge_function, threshold, num_workers): '''Agglomerate fragments in parallel using ``waterz``. Args: affs (`class:daisy.Array`): An array containing affinities. fragments (`class:daisy.Array`): An array containing fragments. rag_provider (`class:SharedRagProvider`): A RAG provider to read nodes from and write found edges to. block_size (``tuple`` of ``int``): The size of the blocks to process in parallel, in world units. context (``tuple`` of ``int``): The context to consider for agglomeration, in world units. merge_function (``string``): The merge function to use for ``waterz``. threshold (``float``): Until which threshold to agglomerate. num_workers (``int``): The number of parallel workers. Returns: True, if all tasks succeeded. ''' assert fragments.data.dtype == np.uint64 shape = affs.shape[1:] context = daisy.Coordinate(context) total_roi = affs.roi.grow(context, context) read_roi = daisy.Roi((0,)*affs.roi.dims(), block_size).grow(context, context) write_roi = daisy.Roi((0,)*affs.roi.dims(), block_size) return daisy.run_blockwise( total_roi, read_roi, write_roi, lambda b: agglomerate_in_block( affs, fragments, rag_provider, b, merge_function, threshold), lambda b: block_done(b, rag_provider), num_workers=num_workers, read_write_conflict=False, fit='shrink')
def extract_fragments(experiment, setup, iteration, affs_file, affs_dataset, fragments_file, fragments_dataset, block_size, context, db_host, db_name, num_workers, fragments_in_xy, queue, epsilon_agglomerate=0, mask_file=None, mask_dataset=None, filter_fragments=0, replace_sections=None, **kwargs): ''' Extract fragments in parallel blocks. Requires that affinities have been predicted before. When running parallel inference, the worker files are located in the setup directory of each experiment since that is where the training was done and checkpoints are located. When running watershed (and agglomeration) in parallel, we call a worker file which can be located anywhere. By default, we assume there is a workers directory inside the current directory that contains worker scripts (e.g `workers/extract_fragments_worker.py`). Args: * following three params just used to build out file directory * experiment (``string``): Name of the experiment (fib25, hemi, zfinch, ...). setup (``string``): Name of the setup to predict (setup01, setup02, ...). iteration (``int``): Training iteration. affs_file (``string``): Path to file (zarr/n5) where predictions are stored. affs_dataset (``string``): Predictions dataset to use (e.g 'volumes/affs'). If using a scale pyramid, will try scale zero assuming stored in directory `s0` (e.g 'volumes/affs/s0'). fragments_file (``string``): Path to file (zarr/n5) to store fragments (supervoxels) - generally a good idea to store in the same place as affs. fragments_dataset (``string``): Name of dataset to write fragments (supervoxels) to (e.g 'volumes/fragments'). block_size (``tuple`` of ``int``): The size of one block in world units (must be multiple of voxel size). context (``tuple`` of ``int``): The context to consider for fragment extraction in world units. db_host (``string``): Name of MongoDB client. db_name (``string``): Name of MongoDB database to use (for logging successful blocks in check function and writing nodes to the region adjacency graph). num_workers (``int``): How many blocks to run in parallel. fragments_in_xy (``bool``): Whether to extract fragments for each xy-section separately. queue (``string``): Name of cpu queue to use (e.g local) epsilon_agglomerate (``float``, optional): Perform an initial waterz agglomeration on the extracted fragments to this threshold. Skip if 0 (default). mask_file (``string``, optional): Path to file (zarr/n5) containing mask. mask_dataset (``string``, optional): Name of mask dataset. Data should be uint8 where 1 == masked in, 0 == masked out. filter_fragments (``float``, optional): Filter fragments that have an average affinity lower than this value. replace_sections (``list`` of ``int``, optional): Replace fragments data with zero in given sections (useful if large artifacts are causing issues). List of section numbers (in voxels). ''' logging.info(f"Reading affs from {affs_file}") try: affs = daisy.open_ds(affs_file, affs_dataset) except: affs_dataset = affs_dataset + '/s0' source = daisy.open_ds(affs_file, affs_dataset) network_dir = os.path.join(experiment, setup, str(iteration)) client = pymongo.MongoClient(db_host) db = client[db_name] if 'blocks_extracted' not in db.list_collection_names(): blocks_extracted = db['blocks_extracted'] blocks_extracted.create_index([('block_id', pymongo.ASCENDING)], name='block_id') else: blocks_extracted = db['blocks_extracted'] # prepare fragments dataset. By default use same roi as affinities, change # roi if extracting fragments in cropped region fragments = daisy.prepare_ds(fragments_file, fragments_dataset, affs.roi, affs.voxel_size, np.uint64, daisy.Roi((0, 0, 0), block_size), compressor={ 'id': 'zlib', 'level': 5 }) context = daisy.Coordinate(context) total_roi = affs.roi.grow(context, context) read_roi = daisy.Roi((0, ) * affs.roi.dims(), block_size).grow(context, context) write_roi = daisy.Roi((0, ) * affs.roi.dims(), block_size) #get number of voxels in block num_voxels_in_block = (write_roi / affs.voxel_size).size() #blockwise watershed daisy.run_blockwise( total_roi=total_roi, read_roi=read_roi, write_roi=write_roi, process_function=lambda: start_worker( affs_file, affs_dataset, fragments_file, fragments_dataset, db_host, db_name, context, fragments_in_xy, queue, network_dir, epsilon_agglomerate, mask_file, mask_dataset, filter_fragments, replace_sections, num_voxels_in_block), check_function=lambda b: check_block(blocks_extracted, b), num_workers=num_workers, read_write_conflict=False, fit='shrink')
def agglomerate(experiment, setup, iteration, affs_file, affs_dataset, fragments_file, fragments_dataset, block_size, context, db_host, db_name, num_workers, queue, merge_function, **kwargs): ''' Agglomerate in parallel blocks. Requires that affinities and supervoxels have been generated. Args: * following three params just used to build out file directory * experiment (``string``): Name of the experiment (fib25, hemi, zfinch, ...). setup (``string``): Name of the setup to predict (setup01, setup02, ...). iteration (``int``): Training iteration. affs_file (``string``): Path to file (zarr/n5) where predictions are stored. affs_dataset (``string``): Predictions dataset to use (e.g 'volumes/affs'). fragments_file (``string``): Path to file (zarr/n5) where fragments (supervoxels) are stored. fragments_dataset (``string``): Name of fragments (supervoxels) dataset (e.g 'volumes/fragments'). block_size (``tuple`` of ``int``): The size of one block in world units (must be multiple of voxel size). context (``tuple`` of ``int``): The context to consider for fragment extraction in world units. db_host (``string``): Name of MongoDB client. db_name (``string``): Name of MongoDB database to use (for logging successful blocks in check function and reading nodes from + writing edges to the region adjacency graph). num_workers (``int``): How many blocks to run in parallel. merge_function (``string``): Symbolic name of a merge function. See dictionary in worker script (workers/agglomerate_worker.py). ''' logging.info(f"Reading affs from {affs_file}") affs = daisy.open_ds(affs_file, affs_dataset, mode='r') network_dir = os.path.join(experiment, setup, str(iteration), merge_function) logging.info(f"Reading fragments from {fragments_file}") fragments = daisy.open_ds(fragments_file, fragments_dataset, mode='r') client = pymongo.MongoClient(db_host) db = client[db_name] blocks_agglomerated = 'blocks_agglomerated_' + merge_function if blocks_agglomerated not in db.list_collection_names(): blocks_agglomerated = db[blocks_agglomerated] blocks_agglomerated.create_index([('block_id', pymongo.ASCENDING)], name='block_id') else: blocks_agglomerated = db[blocks_agglomerated] context = daisy.Coordinate(context) total_roi = affs.roi.grow(context, context) read_roi = daisy.Roi((0, ) * affs.roi.dims(), block_size).grow(context, context) write_roi = daisy.Roi((0, ) * affs.roi.dims(), block_size) daisy.run_blockwise( total_roi, read_roi, write_roi, process_function=lambda: start_worker( affs_file, affs_dataset, fragments_file, fragments_dataset, db_host, db_name, queue, merge_function, network_dir), check_function=lambda b: check_block(blocks_agglomerated, b), num_workers=num_workers, read_write_conflict=False, fit='shrink')
output_path = '../temp/overlap_counts' if os.path.isdir(output_path): shutil.rmtree(output_path) os.makedirs(output_path) # TODO parametrize block size block_size = config['block_size'] total_roi = daisy.Roi(offset=config['roi_offset'], shape=config['roi_shape']) logger.info('Start blockwise processing') start = time.time() daisy.run_blockwise( total_roi=total_roi, read_roi=daisy.Roi(offset=(0, 0, 0), shape=block_size), write_roi=daisy.Roi(offset=(0, 0, 0), shape=block_size), process_function=lambda block: overlap_in_block(block=block, fragments=fragments, groundtruth=groundtruth, tmp_path=output_path), fit='shrink', num_workers=config['num_workers'], read_write_conflict=False, max_retries=0) # TODO parametrize logger.debug('num blocks: {}'.format( np.prod(np.ceil(np.array(config['roi_shape']) / np.array(block_size))))) frag_to_gt = overlap_reduce(output_path) pickle.dump(frag_to_gt, open('frag_to_gt.pickle', 'wb'))
def predict_blockwise(config_file, iteration): config = { "solve_context": daisy.Coordinate((2, 100, 100, 100)), "num_workers": 16, "data_dir": '../01_data', "setups_dir": '../02_setups', } master_config = load_config(config_file) config.update(master_config['general']) config.update(master_config['predict']) sample = config['sample'] data_dir = config['data_dir'] setup = config['setup'] # solve_context = daisy.Coordinate(master_config['solve']['context']) setup_dir = os.path.abspath(os.path.join(config['setups_dir'], setup)) voxel_size, source_roi = get_source_roi(data_dir, sample) predict_roi = source_roi # limit to specific frames, if given if 'limit_to_roi_offset' in config or 'frames' in config: if 'frames' in config: frames = config['frames'] logger.info("Limiting prediction to frames %s" % str(frames)) begin, end = frames frames_roi = daisy.Roi((begin, None, None, None), (end - begin, None, None, None)) predict_roi = predict_roi.intersect(frames_roi) if 'limit_to_roi_offset' in config: assert 'limit_to_roi_shape' in config,\ "Must specify shape and offset in config file" limit_to_roi = daisy.Roi( daisy.Coordinate(config['limit_to_roi_offset']), daisy.Coordinate(config['limit_to_roi_shape'])) predict_roi = predict_roi.intersect(limit_to_roi) # Given frames and rois are the prediction region, # not the solution region # predict_roi = target_roi.grow(solve_context, solve_context) # predict_roi = predict_roi.intersect(source_roi) # get context and total input and output ROI with open(os.path.join(setup_dir, 'test_net_config.json'), 'r') as f: net_config = json.load(f) net_input_size = net_config['input_shape'] net_output_size = net_config['output_shape_2'] net_input_size = daisy.Coordinate(net_input_size) * voxel_size net_output_size = daisy.Coordinate(net_output_size) * voxel_size context = (net_input_size - net_output_size) / 2 # expand predict roi to multiple of block write_roi predict_roi = predict_roi.snap_to_grid(net_output_size, mode='grow') input_roi = predict_roi.grow(context, context) output_roi = predict_roi # prepare output zarr, if necessary if 'output_zarr' in config: output_zarr = config['output_zarr'] parent_vectors_ds = 'volumes/parent_vectors' cell_indicator_ds = 'volumes/cell_indicator' output_path = os.path.join(setup_dir, output_zarr) logger.debug("Preparing zarr at %s" % output_path) daisy.prepare_ds(output_path, parent_vectors_ds, output_roi, voxel_size, dtype=np.float32, write_size=net_output_size, num_channels=3) daisy.prepare_ds(output_path, cell_indicator_ds, output_roi, voxel_size, dtype=np.float32, write_size=net_output_size, num_channels=1) # create read and write ROI block_write_roi = daisy.Roi((0, 0, 0, 0), net_output_size) block_read_roi = block_write_roi.grow(context, context) logger.info("Following ROIs in world units:") logger.info("Input ROI = %s" % input_roi) logger.info("Block read ROI = %s" % block_read_roi) logger.info("Block write ROI = %s" % block_write_roi) logger.info("Output ROI = %s" % output_roi) logger.info("Starting block-wise processing...") # process block-wise if 'db_name' in config: daisy.run_blockwise( input_roi, block_read_roi, block_write_roi, process_function=lambda: predict_worker(config_file, iteration), check_function=lambda b: check_function(b, 'predict', config[ 'db_name'], config['db_host']), num_workers=config['num_workers'], read_write_conflict=False, max_retries=0, fit='valid') else: daisy.run_blockwise( input_roi, block_read_roi, block_write_roi, process_function=lambda: predict_worker(config_file, iteration), num_workers=config['num_workers'], read_write_conflict=False, max_retries=0, fit='valid')
def predict_blockwise( train_dir, iteration, in_container, input_roi_in_pixels, out_container, # output_roi_in_pixels, num_workers, client, block_size_in_chunks=(1, 1, 1), raw_dataset='volumes/raw', affs_dataset='volumes/prediction/affinities', net_io_names_json='net_io_names.json', unet_inference_meta='unet_inference.meta'): setup_dir = os.path.dirname(os.path.realpath(__file__)) # TODO: change to predict graph with open(os.path.join(train_dir, net_io_names_json), 'r') as f: config = json.load(f) raw = ArrayKey(_raw_key) affs = ArrayKey(_affs_key) raw_source = daisy.open_ds(in_container, raw_dataset) # input_voxel_size = Coordinate((360, 36, 36)) # output_voxel_size = Coordinate((120, 108, 108)) input_voxel_size = Coordinate((120, 12, 12)) * 3 output_voxel_size = Coordinate((40, 36, 36)) * 3 input_shape = (91, 862, 862) output_shape = (209, 214, 214) net_input_chunk_size, net_output_chunk_size, context = get_chunk_sizes( input_shape, output_shape, input_voxel_size, output_voxel_size) # compute sizes of blocks block_output_size = net_output_chunk_size * block_size_in_chunks block_input_size = block_output_size + context + context input_roi = (input_roi_in_pixels * input_voxel_size).grow(context, context) # output_roi = output_roi_in_pixels * output_voxel_size block_input_roi = Roi((0, 0, 0), block_input_size) - context block_output_roi = Roi((0, 0, 0), block_output_size) _logger.debug('input_roi_in_pixels %s', input_roi_in_pixels) _logger.debug('input_voxel_size %s', input_voxel_size) _logger.debug('output_voxel_size %s', output_voxel_size) _logger.debug('input shape %s', input_shape) _logger.debug('output shape %s', output_shape) _logger.debug('block_input_size %s', block_input_size) _logger.debug('block_output_size %s', block_output_size) _logger.debug('block_input_roi %s', block_input_roi) _logger.debug('block_output_roi %s', block_output_roi) _logger.debug('input_roi %s', input_roi) # _logger.debug('output_roi %s', output_roi) cwd = os.getcwd() def predict_in_block(block): from distributed import get_worker read_roi = block.read_roi write_roi = block.write_roi predict_script = '/groups/saalfeld/home/hanslovskyp/experiments/quasi-isotropic/predict/predict.py' cuda_visible_devices = get_worker().cuda_visible_devices predict_scripts_args = '' name = 'predict-%s-%s' % (write_roi.get_begin(), write_roi.get_size()) log_file = os.path.join(cwd, '%s.log' % name) pythonpath = ':'.join([ '%s/workspace-pycharm/u-net/gunpowder' % _HOME, '%s/workspace-pycharm/u-net/CNNectome' % _HOME, '/groups/saalfeld/home/papec/Work/my_projects/z5/bld/python' ]) pythonpath_export_str = 'export PYTHONPATH=%s:$PYTHONPATH' % pythonpath daisy.call([ 'nvidia-docker', 'run', '--rm', '-u', os.getuid(), '-v', '/groups/turaga:/groups/turaga:rshared', '-v', '/groups/saalfeld:/groups/saalfeld:rshared', '-v', '/nrs/saalfeld:/nrs/saalfeld:rshared', '-w', cwd, '--name', name, 'neptunes5thmoon/gunpowder:v0.3-pre6-dask1' '/bin/bash', '-c', '"export CUDA_VISIBLE_DEVICES=%s; %s; python -u %s %s 2>&1 > %s"' % (cuda_visible_devices, pythonpath_export_str, predict_script, predict_script_args, log_file) ]) def check_block(block): _logger.debug("Checking if block %s is complete...", block.write_roi) ds = daisy.open_ds(out_container, affs_dataset) center_values = ds[block.write_roi.get_center()] s = np.sum(center_values) _logger.debug("Sum of center values in %s is %f", block.write_roi, s) return s != 0 # TODO set client daisy.run_blockwise(input_roi, block_input_roi, block_output_roi, process_function=predict_in_block, check_function=check_block, num_workers=num_workers, processes=False, read_write_conflict=False, client=client)
help='The output container, defaults to be the same as in_file+.zarr' ) ap.add_argument( "--out_ds_name", type=str, default=None, help='The name of the dataset, defaults to be in_ds_name' ) ap.add_argument( "--chunk_shape_voxel", type=int, help='The size of a chunk in voxels', nargs='+', default=None ) ap.add_argument( "--max_voxel_count", type=int, default=256*1024, help='If chunk_shape_voxel is not given, use this value to calculate' 'a near isotropic chunk shape', ) ap.add_argument( "--roi_offset", type=int, help='', nargs='+', default=None) ap.add_argument( "--roi_shape", type=int, help='', nargs='+', default=None) config = HDF2ZarrTask.parse_args(ap) task = HDF2ZarrTask(config) daisy_task = task.prepare_task() done = daisy.run_blockwise([daisy_task]) if done: logger.info("Ran all blocks successfully!") else: logger.info("Did not run all blocks successfully...")
def extract_segmentation(fragments_file, fragments_dataset, edges_collection, threshold, block_size, out_file, out_dataset, num_workers, roi_offset=None, roi_shape=None, run_type=None, **kwargs): ''' Args: fragments_file (``string``): Path to file (zarr/n5) containing fragments (supervoxels). fragments_dataset (``string``): Name of fragments dataset (e.g `volumes/fragments`) edges_collection (``string``): The name of the MongoDB database edges collection to use. threshold (``float``): The threshold to use for generating a segmentation. block_size (``tuple`` of ``int``): The size of one block in world units (must be multiple of voxel size). out_file (``string``): Path to file (zarr/n5) to write segmentation to. out_dataset (``string``): Name of segmentation dataset (e.g `volumes/segmentation`). num_workers (``int``): How many workers to use when reading the region adjacency graph blockwise. roi_offset (array-like of ``int``, optional): The starting point (inclusive) of the ROI. Entries can be ``None`` to indicate unboundedness. roi_shape (array-like of ``int``, optional): The shape of the ROI. Entries can be ``None`` to indicate unboundedness. run_type (``string``, optional): Can be used to direct luts into directory (e.g testing, validation, etc). ''' # open fragments fragments = daisy.open_ds(fragments_file, fragments_dataset) total_roi = fragments.roi if roi_offset is not None: assert roi_shape is not None, "If roi_offset is set, roi_shape " \ "also needs to be provided" total_roi = daisy.Roi(offset=roi_offset, shape=roi_shape) read_roi = daisy.Roi((0, ) * 3, daisy.Coordinate(block_size)) write_roi = read_roi logging.info("Preparing segmentation dataset...") segmentation = daisy.prepare_ds(out_file, out_dataset, total_roi, voxel_size=fragments.voxel_size, dtype=np.uint64, write_roi=write_roi) lut_filename = f'seg_{edges_collection}_{int(threshold*100)}' lut_dir = os.path.join(fragments_file, 'luts', 'fragment_segment') if run_type: lut_dir = os.path.join(lut_dir, run_type) logging.info(f"Run type set, using luts from {run_type} data") lut = os.path.join(lut_dir, lut_filename + '.npz') assert os.path.exists(lut), f"{lut} does not exist" logging.info("Reading fragment-segment LUT...") lut = np.load(lut)['fragment_segment_lut'] logging.info(f"Found {len(lut[0])} fragments in LUT") num_segments = len(np.unique(lut[1])) logging.info(f"Relabelling fragments to {num_segments} segments") daisy.run_blockwise(total_roi, read_roi, write_roi, lambda b: segment_in_block( b, fragments_file, segmentation, fragments, lut), fit='shrink', num_workers=num_workers)
def solve_blockwise( db_host, db_name, sample, parameters, # list of TrackingParameters num_workers=8, frames=None, limit_to_roi=None, from_scratch=False, data_dir='../01_data', cell_cycle_key=None, **kwargs): block_size = daisy.Coordinate(parameters[0].block_size) context = daisy.Coordinate(parameters[0].context) # block size and context must be the same for all parameters! for i in range(len(parameters)): assert list(block_size) == parameters[i].block_size,\ "%s not equal to %s" %\ (block_size, parameters[i].block_size) assert list(context) == parameters[i].context voxel_size, source_roi = get_source_roi(data_dir, sample) # determine parameters id from database graph_provider = CandidateDatabase( db_name, db_host) parameters_id = [graph_provider.get_parameters_id(p) for p in parameters] if from_scratch: for pid in parameters_id: graph_provider.set_parameters_id(pid) graph_provider.reset_selection() # limit to specific frames, if given if frames: logger.info("Solving in frames %s" % frames) begin, end = frames crop_roi = daisy.Roi( (begin, None, None, None), (end - begin, None, None, None)) source_roi = source_roi.intersect(crop_roi) # limit to roi, if given if limit_to_roi: logger.info("limiting to roi %s" % str(limit_to_roi)) source_roi = source_roi.intersect(limit_to_roi) block_write_roi = daisy.Roi( (0, 0, 0, 0), block_size) block_read_roi = block_write_roi.grow( context, context) total_roi = source_roi.grow( context, context) logger.info("Solving in %s", total_roi) param_names = ['solve_' + str(_id) for _id in parameters_id] if len(parameters_id) > 1: # check if set of parameters is already done step_name = 'solve_' + str(hash(frozenset(parameters_id))) if check_function_all_blocks(step_name, db_name, db_host): logger.info("Param set with name %s already completed. Exiting", step_name) return True else: step_name = 'solve_' + str(parameters_id[0]) # Check each individual parameter to see if it is done # if it is, remove it from the list done_indices = [] for _id, name in zip(parameters_id, param_names): if check_function_all_blocks(name, db_name, db_host): logger.info("Params with id %d already completed. Removing", _id) done_indices.append(parameters_id.index(_id)) for index in done_indices[::-1]: del parameters_id[index] del parameters[index] del param_names[index] logger.debug(parameters_id) if len(parameters_id) == 0: logger.info("All parameters in set already completed. Exiting") return True success = daisy.run_blockwise( total_roi, block_read_roi, block_write_roi, process_function=lambda b: solve_in_block( db_host, db_name, parameters, b, parameters_id, solution_roi=source_roi, cell_cycle_key=cell_cycle_key), # Note: in the case of a set of parameters, # we are assuming that none of the individual parameters are # half done and only checking the hash for each block check_function=lambda b: check_function( b, step_name, db_name, db_host), num_workers=num_workers, fit='overhang') if success: # write all done to individual parameters and set if len(param_names) > 1: write_done_all_blocks( step_name, db_name, db_host) for name in param_names: write_done_all_blocks( name, db_name, db_host) logger.info("Finished solving") return success
def extract_segmentation(fragments_file, fragments_dataset, edges_collection, threshold, out_file, out_dataset, num_workers, lut_fragment_segment, roi_offset=None, roi_shape=None, run_type=None, **kwargs): # open fragments fragments = daisy.open_ds(fragments_file, fragments_dataset) total_roi = fragments.roi if roi_offset is not None: assert roi_shape is not None, "If roi_offset is set, roi_shape " \ "also needs to be provided" total_roi = daisy.Roi(offset=roi_offset, shape=roi_shape) read_roi = daisy.Roi((0, 0, 0), (5000, 5000, 5000)) write_roi = daisy.Roi((0, 0, 0), (5000, 5000, 5000)) logging.info("Preparing segmentation dataset...") segmentation = daisy.prepare_ds(out_file, out_dataset, total_roi, voxel_size=fragments.voxel_size, dtype=np.uint64, write_roi=write_roi) lut_filename = 'seg_%s_%d' % (edges_collection, int(threshold * 100)) lut_dir = os.path.join(fragments_file, lut_fragment_segment) if run_type: lut_dir = os.path.join(lut_dir, run_type) logging.info("Run type set, using luts from %s data" % run_type) lut = os.path.join(lut_dir, lut_filename + '.npz') assert os.path.exists(lut), "%s does not exist" % lut start = time.time() logging.info("Reading fragment-segment LUT...") lut = np.load(lut)['fragment_segment_lut'] logging.info("%.3fs" % (time.time() - start)) logging.info("Found %d fragments in LUT" % len(lut[0])) daisy.run_blockwise(total_roi, read_roi, write_roi, lambda b: segment_in_block( b, fragments_file, segmentation, fragments, lut), fit='shrink', num_workers=num_workers, processes=True, read_write_conflict=False)
help="Size of block read region", default=[20, 200, 200]) ap.add_argument('--block_write_size', '-w', nargs='+', help="Size of block write region", default=[18, 180, 180]) config = GaussianSmoothingTask.parse_args(ap) config1 = copy.deepcopy(config) config1['out_ds_name'] = 'volumes/raw_smoothed' daisy_task1 = GaussianSmoothingTask(config1, task_id='Gaussian1').prepare_task() # here we reuse parameters but set the output dataset of the previous # task as input config2 = copy.deepcopy(config) config2['in_ds_name'] = 'volumes/raw_smoothed' config2['out_ds_name'] = 'volumes/raw_smoothed_smoothed' daisy_task2 = GaussianSmoothingTask( config2, task_id='Gaussian2').prepare_task(upstream_tasks=[daisy_task1]) done = daisy.run_blockwise([daisy_task1, daisy_task2]) if done: print("Ran all blocks successfully!") else: print("Did not run all blocks successfully...")