def parallel_skeletonize_subset(dbf_shm_location, dbf_shape, dbf_dtype, cc_shm_location, cc_shape, cc_dtype, vg_shm_location, vg_shape, vg_dtype, *args, **kwargs): dbf_mmap, all_dbf = shm.ndarray(dbf_shape, dtype=dbf_dtype, location=dbf_shm_location, order='F') cc_mmap, cc_labels = shm.ndarray(cc_shape, dtype=cc_dtype, location=cc_shm_location, order='F') if vg_shape is None: vg_mmap, voxel_graph = None, None else: vg_mmap, voxel_graph = shm.ndarray(vg_shape, dtype=vg_dtype, location=vg_shm_location, order='F') skels = skeletonize_subset(all_dbf, cc_labels, voxel_graph, *args, **kwargs) dbf_mmap.close() cc_mmap.close() if vg_mmap: vg_mmap.close() return skels
def child_process_download(meta, cache, mip, compress_cache, dest_bbox, fill_missing, progress, location, use_shared_memory, green, cloudpaths): reset_connection_pools() # otherwise multi-process hangs shape = list(dest_bbox.size3()) + [meta.num_channels] if use_shared_memory: array_like, dest_img = shm.ndarray(shape, dtype=meta.dtype, location=location, lock=fs_lock) else: array_like, dest_img = shm.ndarray_fs(shape, dtype=meta.dtype, location=location, emulate_shm=False, lock=fs_lock) def process(src_img, src_bbox): shade(dest_img, dest_bbox, src_img, src_bbox) download_chunks_threaded(meta, cache, mip, cloudpaths, fn=process, fill_missing=fill_missing, progress=progress, compress_cache=compress_cache, green=green) array_like.close()
def parallel_skeletonize_subset(dbf_shm_location, dbf_shape, dbf_dtype, cc_shm_location, cc_shape, cc_dtype, *args, **kwargs): dbf_mmap, all_dbf = shm.ndarray(dbf_shape, dtype=dbf_dtype, location=dbf_shm_location, order='F') cc_mmap, cc_labels = shm.ndarray(cc_shape, dtype=cc_dtype, location=cc_shm_location, order='F') skels = skeletonize_subset(all_dbf, cc_labels, *args, **kwargs) dbf_mmap.close() cc_mmap.close() return skels
def multiprocess_download(requested_bbox, mip, cloudpaths, meta, cache, compress_cache, fill_missing, progress, parallel, location, retain, use_shared_memory, order, green, secrets=None, background_color=0): cloudpaths_by_process = [] length = int(math.ceil(len(cloudpaths) / float(parallel)) or 1) for i in range(0, len(cloudpaths), length): cloudpaths_by_process.append(cloudpaths[i:i + length]) cpd = partial(child_process_download, meta, cache, mip, compress_cache, requested_bbox, fill_missing, progress, location, use_shared_memory, green, secrets, background_color) parallel_execution(cpd, cloudpaths_by_process, parallel, cleanup_shm=location) shape = list(requested_bbox.size3()) + [meta.num_channels] if use_shared_memory: mmap_handle, renderbuffer = shm.ndarray(shape, dtype=meta.dtype, order=order, location=location, lock=fs_lock) else: handle, renderbuffer = shm.ndarray_fs(shape, dtype=meta.dtype, order=order, location=location, lock=fs_lock, emulate_shm=False) if not retain: if use_shared_memory: shm.unlink(location) else: os.unlink(location) return mmap_handle, renderbuffer
def child_upload_process( meta, cache, img_shape, offset, mip, compress, cdn_cache, progress, location, location_bbox, location_order, delete_black_uploads, background_color, green, chunk_ranges, compress_level=None, ): global fs_lock reset_connection_pools() shared_shape = img_shape if location_bbox: shared_shape = list(location_bbox.size3()) + [meta.num_channels] array_like, renderbuffer = shm.ndarray(shape=shared_shape, dtype=meta.dtype, location=location, order=location_order, lock=fs_lock, readonly=True) if location_bbox: cutout_bbox = Bbox(offset, offset + img_shape[:3]) delta_box = cutout_bbox.clone() - location_bbox.minpt renderbuffer = renderbuffer[delta_box.to_slices()] threaded_upload_chunks( meta, cache, renderbuffer, mip, chunk_ranges, compress=compress, cdn_cache=cdn_cache, progress=progress, delete_black_uploads=delete_black_uploads, background_color=background_color, green=green, compress_level=compress_level, ) array_like.close()
def multiprocess_download( requested_bbox, mip, cloudpaths, meta, cache, lru, compress_cache, fill_missing, progress, parallel, location, retain, use_shared_memory, order, green, secrets=None, background_color=0, ): cpd = partial(child_process_download, meta, cache, mip, compress_cache, requested_bbox, fill_missing, progress, location, use_shared_memory, green, secrets, background_color ) if lru.size > 0: for path in cloudpaths: lru.pop(path, None) parallel_execution( cpd, cloudpaths, parallel, progress=progress, desc="Download", cleanup_shm=location, block_size=750, ) shape = list(requested_bbox.size3()) + [ meta.num_channels ] if use_shared_memory: mmap_handle, renderbuffer = shm.ndarray( shape, dtype=meta.dtype, order=order, location=location, lock=fs_lock ) else: handle, renderbuffer = shm.ndarray_fs( shape, dtype=meta.dtype, order=order, location=location, lock=fs_lock, emulate_shm=False ) if meta.encoding(mip) == "raw": repopulate_lru_from_shm(meta, mip, lru, renderbuffer, requested_bbox) if not retain: if use_shared_memory: shm.unlink(location) else: os.unlink(location) return mmap_handle, renderbuffer
def test_parallel_shared_memory_write(): delete_layer() cv, _ = create_layer(size=(256, 256, 128, 1), offset=(0, 0, 0)) shm_location = 'cloudvolume-test-shm-parallel-write' mmapfh, shareddata = shm.ndarray(shape=(256, 256, 128), dtype=np.uint8, location=shm_location) shareddata[:] = 1 cv.parallel = 1 cv.upload_from_shared_memory(shm_location, Bbox((0, 0, 0), (256, 256, 128))) assert np.all(cv[:] == 1) shareddata[:] = 2 cv.parallel = 2 cv.upload_from_shared_memory(shm_location, Bbox((0, 0, 0), (256, 256, 128))) assert np.all(cv[:] == 2) shareddata[:, :, :64] = 3 cv.upload_from_shared_memory(shm_location, bbox=Bbox((0, 0, 0), (256, 256, 128)), cutout_bbox=Bbox((0, 0, 0), (256, 256, 64))) assert np.all(cv[:, :, :64] == 3) assert np.all(cv[:, :, 64:128] == 2) shareddata[:, :, :69] = 4 cv.autocrop = True cv.upload_from_shared_memory(shm_location, bbox=Bbox((-5, -5, -5), (251, 251, 123)), cutout_bbox=Bbox((-5, -5, -5), (128, 128, 64))) assert np.all(cv[:128, :128, :63] == 4) assert np.all(cv[128:, 128:, :64] == 3) assert np.all(cv[:, :, 64:128] == 2) shareddata[:] = 0 shareddata[:, 0, 0] = 1 cv.upload_from_shared_memory(shm_location, bbox=Bbox((0, 0, 0), (256, 256, 128)), order='C') assert np.all(cv[0, 0, :] == 1) assert np.all(cv[1, 0, :] == 0) mmapfh.close() shm.unlink(shm_location)
def child_process_download( meta, cache, mip, compress_cache, dest_bbox, fill_missing, progress, location, use_shared_memory, green, secrets, background_color, cloudpaths ): reset_connection_pools() # otherwise multi-process hangs shape = list(dest_bbox.size3()) + [ meta.num_channels ] if use_shared_memory: array_like, dest_img = shm.ndarray( shape, dtype=meta.dtype, location=location, lock=fs_lock ) else: array_like, dest_img = shm.ndarray_fs( shape, dtype=meta.dtype, location=location, emulate_shm=False, lock=fs_lock ) if background_color != 0: dest_img[dest_bbox.to_slices()] = background_color def process(src_img, src_bbox): shade(dest_img, dest_bbox, src_img, src_bbox) if progress: # This is not good programming practice, but # I could not find a clean way to do this that # did not result in warnings about leaked semaphores. # progress_queue is created in common.py:initialize_progress_queue # as a global for this module. progress_queue.put(1) download_chunks_threaded( meta, cache, None, mip, cloudpaths, fn=process, decode_fn=decode, fill_missing=fill_missing, progress=False, compress_cache=compress_cache, green=green, secrets=secrets, background_color=background_color ) array_like.close() return len(cloudpaths)
def download(requested_bbox, mip, meta, cache, fill_missing, progress, parallel, location, retain, use_shared_memory, use_file, compress, order='F', green=False, secrets=None, renumber=False, background_color=0): """Cutout a requested bounding box from storage and return it as a numpy array.""" full_bbox = requested_bbox.expand_to_chunk_size( meta.chunk_size(mip), offset=meta.voxel_offset(mip)) full_bbox = Bbox.clamp(full_bbox, meta.bounds(mip)) cloudpaths = list( chunknames(full_bbox, meta.bounds(mip), meta.key(mip), meta.chunk_size(mip), protocol=meta.path.protocol)) shape = list(requested_bbox.size3()) + [meta.num_channels] compress_cache = should_compress(meta.encoding(mip), compress, cache, iscache=True) handle = None if renumber and (parallel != 1): raise ValueError("renumber is not supported for parallel operation.") if use_shared_memory and use_file: raise ValueError( "use_shared_memory and use_file are mutually exclusive arguments.") dtype = np.uint16 if renumber else meta.dtype if parallel == 1: if use_shared_memory: # write to shared memory handle, renderbuffer = shm.ndarray(shape, dtype=dtype, order=order, location=location, lock=fs_lock) if not retain: shm.unlink(location) elif use_file: # write to ordinary file handle, renderbuffer = shm.ndarray_fs(shape, dtype=dtype, order=order, location=location, lock=fs_lock, emulate_shm=False) if not retain: os.unlink(location) else: renderbuffer = np.full(shape=shape, fill_value=background_color, dtype=dtype, order=order) def process(img3d, bbox): shade(renderbuffer, requested_bbox, img3d, bbox) remap = {background_color: background_color} lock = threading.Lock() N = 1 def process_renumber(img3d, bbox): nonlocal N nonlocal lock nonlocal remap nonlocal renderbuffer img_labels = fastremap.unique(img3d) with lock: for lbl in img_labels: if lbl not in remap: remap[lbl] = N N += 1 if N > np.iinfo(renderbuffer.dtype).max: renderbuffer = fastremap.refit(renderbuffer, value=N, increase_only=True) fastremap.remap(img3d, remap, in_place=True) shade(renderbuffer, requested_bbox, img3d, bbox) fn = process if renumber and not (use_file or use_shared_memory): fn = process_renumber download_chunks_threaded(meta, cache, mip, cloudpaths, fn=fn, fill_missing=fill_missing, progress=progress, compress_cache=compress_cache, green=green, secrets=secrets, background_color=background_color) else: handle, renderbuffer = multiprocess_download( requested_bbox, mip, cloudpaths, meta, cache, compress_cache, fill_missing, progress, parallel, location, retain, use_shared_memory=(use_file == False), order=order, green=green, secrets=secrets, background_color=background_color) out = VolumeCutout.from_volume(meta, mip, renderbuffer, requested_bbox, handle=handle) if renumber: return (out, remap) return out
def upload_aligned( meta, cache, img, offset, mip, compress=None, compress_level=None, cdn_cache=None, progress=False, parallel=1, location=None, location_bbox=None, location_order='F', use_shared_memory=False, use_file=False, delete_black_uploads=False, background_color=0, green=False, ): global fs_lock chunk_ranges = list(generate_chunks(meta, img, offset, mip)) if parallel == 1: threaded_upload_chunks( meta, cache, img, mip, chunk_ranges, progress=progress, compress=compress, cdn_cache=cdn_cache, delete_black_uploads=delete_black_uploads, background_color=background_color, green=green, compress_level=compress_level, ) return length = (len(chunk_ranges) // parallel) or 1 chunk_ranges_by_process = [] for i in range(0, len(chunk_ranges), length): chunk_ranges_by_process.append(chunk_ranges[i:i + length]) # use_shared_memory means use a predetermined # shared memory location, not no shared memory # at all. if not use_shared_memory: array_like, renderbuffer = shm.ndarray(shape=img.shape, dtype=img.dtype, location=location, order=location_order, lock=fs_lock) renderbuffer[:] = img cup = partial(child_upload_process, meta, cache, img.shape, offset, mip, compress, cdn_cache, progress, location, location_bbox, location_order, delete_black_uploads, background_color, green, compress_level=compress_level) parallel_execution(cup, chunk_ranges_by_process, parallel, cleanup_shm=location) # If manual mode is enabled, it's the # responsibilty of the user to clean up if not use_shared_memory: array_like.close() shm.unlink(location)
def download(requested_bbox, mip, meta, cache, fill_missing, progress, parallel, location, retain, use_shared_memory, use_file, compress, order='F', green=False): """Cutout a requested bounding box from storage and return it as a numpy array.""" full_bbox = requested_bbox.expand_to_chunk_size( meta.chunk_size(mip), offset=meta.voxel_offset(mip)) full_bbox = Bbox.clamp(full_bbox, meta.bounds(mip)) cloudpaths = list( chunknames(full_bbox, meta.bounds(mip), meta.key(mip), meta.chunk_size(mip), protocol=meta.path.protocol)) shape = list(requested_bbox.size3()) + [meta.num_channels] compress_cache = should_compress(meta.encoding(mip), compress, cache, iscache=True) handle = None if use_shared_memory and use_file: raise ValueError( "use_shared_memory and use_file are mutually exclusive arguments.") if parallel == 1: if use_shared_memory: # write to shared memory handle, renderbuffer = shm.ndarray(shape, dtype=meta.dtype, order=order, location=location, lock=fs_lock) if not retain: shm.unlink(location) elif use_file: # write to ordinary file handle, renderbuffer = shm.ndarray_fs(shape, dtype=meta.dtype, order=order, location=location, lock=fs_lock, emulate_shm=False) if not retain: os.unlink(location) else: renderbuffer = np.zeros(shape=shape, dtype=meta.dtype, order=order) def process(img3d, bbox): shade(renderbuffer, requested_bbox, img3d, bbox) download_chunks_threaded(meta, cache, mip, cloudpaths, fn=process, fill_missing=fill_missing, progress=progress, compress_cache=compress_cache, green=green) else: handle, renderbuffer = multiprocess_download( requested_bbox, mip, cloudpaths, meta, cache, compress_cache, fill_missing, progress, parallel, location, retain, use_shared_memory=(use_file == False), order=order, green=green, ) return VolumeCutout.from_volume(meta, mip, renderbuffer, requested_bbox, handle=handle)
def skeletonize(all_labels, teasar_params=DEFAULT_TEASAR_PARAMS, anisotropy=(1, 1, 1), object_ids=None, dust_threshold=1000, cc_safety_factor=1, progress=False, fix_branching=True, in_place=False, fix_borders=True, parallel=1, parallel_chunk_size=100, extra_targets_before=[], extra_targets_after=[], fill_holes=False, fix_avocados=False): """ Skeletonize all non-zero labels in a given 2D or 3D image. Required: all_labels: a 2D or 3D numpy array of integer type (signed or unsigned) Optional: anisotropy: the physical dimensions of each axis (e.g. 4nm x 4nm x 40nm) object_ids: If not none, zero out all labels other than those specified here. teasar_params: { scale: during the "rolling ball" invalidation phase, multiply the DBF value by this. const: during the "rolling ball" invalidation phase, this is the minimum radius in chosen physical units (i.e. nm). soma_detection_threshold: if object has a DBF value larger than this, root will be placed at largest DBF value and special one time invalidation will be run over that root location (see soma_invalidation scale) expressed in chosen physical units (i.e. nm) pdrf_scale: scale factor in front of dbf, used to weight dbf over euclidean distance (higher to pay more attention to dbf) (default 5000) pdrf_exponent: exponent in dbf formula on distance from edge, faster if factor of 2 (default 16) soma_invalidation_scale: the 'scale' factor used in the one time soma root invalidation (default .5) soma_invalidation_const: the 'const' factor used in the one time soma root invalidation (default 0) (units in chosen physical units (i.e. nm)) max_paths: max paths to trace on a single object. Moves onto the next object after this point. } dust_threshold: don't bother skeletonizing connected components smaller than this many voxels. fill_holes: preemptively run a void filling algorithm on all connected components and delete labels that get filled in. This can improve the quality of the reconstruction if holes in the shapes are artifacts introduced by the segmentation pipeline. This option incurs moderate overhead. WARNING: THIS WILL REMOVE INPUT LABELS THAT ARE DEEMED TO BE HOLES. cc_safety_factor: Value between 0 and 1 that scales the size of the disjoint set maps in connected_components. 1 is guaranteed to work, but is probably excessive and corresponds to every pixel being a different label. Use smaller values to save some memory. extra_targets_before: List of x,y,z voxel coordinates that will all be traced to from the root regardless of whether those points have been invalidated. These targets will be applied BEFORE the regular target selection algorithm is run. e.g. [ (x,y,z), (x,y,z) ] extra_targets_after: Same as extra_targets_before but the additional targets will be applied AFTER the usual algorithm runs. progress: if true, display a progress bar fix_branching: When enabled, zero the edge weights by of previously traced paths. This causes branch points to occur closer to the actual path divergence. However, there is a performance penalty associated with this as dijkstra's algorithm is computed once per a path rather than once per a skeleton. in_place: if true, allow input labels to be modified to reduce memory usage and possibly improve performance. fix_borders: ensure that segments touching the border place a skeleton endpoint in a predictable place to make merging adjacent chunks easier. fix_avocados: If nuclei are segmented seperately from somata then we can try to detect and fix this issue. parallel: number of subprocesses to use. <= 0: Use multiprocessing.count_cpu() 1: Only use the main process. >= 2: Use this number of subprocesses. parallel_chunk_size: default number of skeletons to submit to each parallel process before returning results, updating the progress bar, and submitting a new task set. Setting this number too low results in excess IPC overhead, and setting it too high can result in task starvation towards the end of a job and infrequent progress bar updates. If the chunk size is set higher than num tasks // parallel, that number is used instead. Returns: { $segid: cloudvolume.PrecomputedSkeleton, ... } """ anisotropy = np.array(anisotropy, dtype=np.float32) all_labels = format_labels(all_labels, in_place=in_place) all_labels = apply_object_mask(all_labels, object_ids) if all_labels.size <= dust_threshold: return {} minlabel, maxlabel = fastremap.minmax(all_labels) if minlabel == 0 and maxlabel == 0: return {} cc_labels, remapping = compute_cc_labels(all_labels, cc_safety_factor) del all_labels if fill_holes: cc_labels = fill_all_holes(cc_labels, progress) extra_targets_before = points_to_labels(extra_targets_before, cc_labels) extra_targets_after = points_to_labels(extra_targets_after, cc_labels) def edtfn(labels): return edt.edt( labels, anisotropy=anisotropy, black_border=(minlabel == maxlabel), order='F', parallel=parallel, ) all_dbf = edtfn(cc_labels) if fix_avocados: cc_labels, all_dbf, remapping = engage_avocado_protection( cc_labels, all_dbf, remapping, soma_detection_threshold=teasar_params.get( 'soma_detection_threshold', 0), edtfn=edtfn, progress=progress, ) cc_segids, pxct = fastremap.unique(cc_labels, return_counts=True) cc_segids = [ sid for sid, ct in zip(cc_segids, pxct) if ct > dust_threshold and sid != 0 ] all_slices = find_objects(cc_labels) border_targets = defaultdict(list) if fix_borders: border_targets = compute_border_targets(cc_labels, anisotropy) print_quotes(parallel) # easter egg if parallel <= 0: parallel = mp.cpu_count() if parallel == 1: return skeletonize_subset(all_dbf, cc_labels, remapping, teasar_params, anisotropy, all_slices, border_targets, extra_targets_before, extra_targets_after, progress, fix_borders, fix_branching, cc_segids) else: # The following section can't be moved into # skeletonize parallel because then all_dbf # and cc_labels can't be deleted to save memory. suffix = uuid.uuid1().hex dbf_shm_location = 'kimimaro-shm-dbf-' + suffix cc_shm_location = 'kimimaro-shm-cc-labels-' + suffix dbf_mmap, all_dbf_shm = shm.ndarray(all_dbf.shape, all_dbf.dtype, dbf_shm_location, order='F') cc_mmap, cc_labels_shm = shm.ndarray(cc_labels.shape, cc_labels.dtype, cc_shm_location, order='F') all_dbf_shm[:] = all_dbf cc_labels_shm[:] = cc_labels del all_dbf del cc_labels skeletons = skeletonize_parallel( all_dbf_shm, dbf_shm_location, cc_labels_shm, cc_shm_location, remapping, teasar_params, anisotropy, all_slices, border_targets, extra_targets_before, extra_targets_after, progress, fix_borders, fix_branching, cc_segids, parallel, parallel_chunk_size) dbf_mmap.close() cc_mmap.close() return skeletons
def child_upload_process(meta, cache, img_shape, offset, mip, compress, cdn_cache, progress, location, location_bbox, location_order, delete_black_uploads, background_color, green, chunk_ranges, compress_level=None, secrets=None): global fs_lock reset_connection_pools() shared_shape = img_shape if location_bbox: shared_shape = list(location_bbox.size3()) + [meta.num_channels] array_like, renderbuffer = shm.ndarray(shape=shared_shape, dtype=meta.dtype, location=location, order=location_order, lock=fs_lock, readonly=True) def updatefn(): if progress: # This is not good programming practice, but # I could not find a clean way to do this that # did not result in warnings about leaked semaphores. # progress_queue is created in common.py:initialize_progress_queue # as a global for this module. progress_queue.put(1) try: if location_bbox: cutout_bbox = Bbox(offset, offset + img_shape[:3]) delta_box = cutout_bbox.clone() - location_bbox.minpt renderbuffer = renderbuffer[delta_box.to_slices()] return threaded_upload_chunks( meta, cache, None, renderbuffer, mip, chunk_ranges, compress=compress, cdn_cache=cdn_cache, progress=updatefn, delete_black_uploads=delete_black_uploads, background_color=background_color, green=green, compress_level=compress_level, secrets=secrets, ) finally: array_like.close()