def main(): # Create the destination instance if necessary. dst_instances = fetch_repo_instances(*dst_node, 'annotation') if dst_syn not in dst_instances: logger.info(f"Creating instance '{dst_syn}'") create_instance(*dst_node, dst_syn, 'annotation') # Check to see if the sync already exists; add it if necessary syn_info = fetch_instance_info(*dst_node, dst_syn) if len(syn_info["Base"]["Syncs"]) == 0: logger.info(f"Adding a sync to '{dst_syn}' from '{dst_seg}'") post_sync(*dst_node, dst_syn, [dst_seg]) elif syn_info["Base"]["Syncs"][0] != dst_seg: other_seg = syn_info["Base"]["Syncs"][0] raise RuntimeError( f"Can't create a sync to '{dst_seg}'. " f"Your instance is already sync'd to a different segmentation: {other_seg}" ) # Fetch segmentation extents bounding_box_zyx = fetch_volume_box(*src_node, src_seg).tolist() # Break into block-aligned chunks (boxes) that are long in the X direction # (optimal access pattern for dvid read/write) boxes = boxes_from_grid(bounding_box_zyx, (256, 256, 6400), clipped=True) # Use a process pool to copy the chunks in parallel. compute_parallel(copy_syn_blocks, boxes, processes=PROCESSES, ordered=False)
def export_downsampled_grayscale(instance_info, scale, parent_output_dir): if scale > 0: instance_name = f'grayscale_{scale}' instance_info = (instance_info[0], instance_info[1], instance_name) output_dir = f'{parent_output_dir}/grayscale-scale-{scale}' os.makedirs(output_dir, exist_ok=True) scaled_shape_zyx = fetch_volume_box(*instance_info)[1] ns = DVIDNodeService(*instance_info[:2]) z_slab_bounds = list(range(0, scaled_shape_zyx[0] // 64 * 64 + 1, 64)) y_stop, x_stop = scaled_shape_zyx[1:3] for z_start, z_stop in tqdm(list(zip(z_slab_bounds[:-1], z_slab_bounds[1:]))): slab_shape = (z_stop - z_start, y_stop, x_stop) slab_vol = ns.get_gray3D(instance_name, slab_shape, (z_start, 0, 0), False, False) slab_vol = vigra.taggedView(slab_vol, 'zyx') for z, z_slice in enumerate(tqdm(slab_vol, leave=False), start=z_start): vigra.impex.writeImage(z_slice, f'{output_dir}/{z:05d}.tiff', 'UINT8')
def export_sparsevol(server, uuid, instance, neurons_df, scale=5, format='tiff', output_dir='.'): import os import vigra import numpy as np from neuclease.util import round_box, tqdm_proxy from neuclease.dvid import fetch_sparsevol, resolve_ref, fetch_volume_box, box_to_slicing uuid = resolve_ref(server, uuid) # Determine the segmentation bounding box at the given scale, # which is used as the mask shape. seg = (server, uuid, instance) box = round_box(fetch_volume_box(*seg), 64, 'out') box[0] = (0,0,0) box_scaled = box // 2**scale # How many digits will we need in each slice file name? digits = int(np.ceil(np.log10(box_scaled[1, 0]))) # Export a mask stack for each group. groups = neurons_df.groupby('group', sort=False) num_groups = neurons_df['group'].nunique() group_prog = tqdm_proxy(groups, total=num_groups) for group, df in group_prog: group_prog.write(f'Group "{group}": Assembling mask') group_mask = np.zeros(box_scaled[1], dtype=bool) group_mask = vigra.taggedView(group_mask, 'zyx') # Overlay each body mask in the current group for body in tqdm_proxy(df['body'], leave=False): body_mask, mask_box = fetch_sparsevol(*seg, body, scale=scale, format='mask') group_mask[box_to_slicing(*mask_box)] |= body_mask # Write out the slice files group_prog.write(f'Group "{group}": Writing slices') d = f'{output_dir}/{group}.stack' os.makedirs(d, exist_ok=True) for z in tqdm_proxy(range(group_mask.shape[0]), leave=False): p = ('{d}/{z:' + f'0{digits}' + 'd}.{f}').format(d=d, z=z, f=format) vigra.impex.writeImage(group_mask[z].astype(np.uint8), p)
def copy_synapses(src_loc, dst_loc, processes): """ See caveats in the module docstring above. """ src_loc = Location(*src_loc) dst_loc = Location(*dst_loc) # Create the destination instance if necessary. dst_instances = fetch_repo_instances(*dst_loc[:2], 'annotation') if dst_loc.syn_instance not in dst_instances: logger.info(f"Creating instance '{dst_loc.syn_instance}'") create_instance(*dst_loc, 'annotation') # Check to see if the sync already exists; add it if necessary syn_info = fetch_instance_info(*dst_loc[:3]) if len(syn_info["Base"]["Syncs"]) == 0: logger.info( f"Adding a sync to '{dst_loc.syn_instance}' from '{dst_loc.seg_instance}'" ) post_sync(*dst_loc[:3], [dst_loc.seg_instance]) elif syn_info["Base"]["Syncs"][0] != dst_loc.seg_instance: other_seg = syn_info["Base"]["Syncs"][0] raise RuntimeError( f"Can't create a sync to '{dst_loc.seg_instance}'. " f"Your instance is already sync'd to a different segmentation: {other_seg}" ) # Fetch segmentation extents bounding_box_zyx = fetch_volume_box(*src_loc[:2], src_loc.seg_instance).tolist() # Break into block-aligned chunks (boxes) that are long in the X direction # (optimal access pattern for dvid read/write) boxes = boxes_from_grid(bounding_box_zyx, (256, 256, 6400), clipped=True) # Use a process pool to copy the chunks in parallel. fn = partial(copy_syn_blocks, src_loc, dst_loc) compute_parallel(fn, boxes, processes=processes, ordered=False)
def __init__(self, volume_config, resource_manager_client=None): validate(volume_config, DvidGenericVolumeSchema, inject_defaults=True) assert 'apply-labelmap' not in volume_config["dvid"].keys(), \ ("The apply-labelmap section should be in the 'adapters' section, (parallel to 'dvid' and 'geometry'), " "not nested within the 'dvid' section!") ## ## server, uuid ## ## Note: ## self.uuid will be resolved, but volume_config["dvid"]["uuid"] ## will not be overwritten. It will remain unresolved. ## self._server = volume_config["dvid"]["server"] self._uuid = resolve_ref(volume_config["dvid"]["server"], volume_config["dvid"]["uuid"]) self._throttle = volume_config["dvid"]["accept-throttling"] ## ## instance, dtype, etc. ## config_block_width = volume_config["geometry"]["block-width"] assert ('segmentation-name' in volume_config["dvid"]) ^ ('grayscale-name' in volume_config["dvid"]), \ "Config error: Specify either segmentation-name or grayscale-name (not both)" if "segmentation-name" in volume_config["dvid"]: self._instance_name = volume_config["dvid"]["segmentation-name"] self._dtype = np.uint64 elif "grayscale-name" in volume_config["dvid"]: self._instance_name = volume_config["dvid"]["grayscale-name"] self._dtype = np.uint8 self._dtype_nbytes = np.dtype(self._dtype).type().nbytes try: instance_info = fetch_instance_info(self._server, self._uuid, self._instance_name) except HTTPError as ex: if ex.response.status_code != 400: raise if not volume_config["dvid"]["create-if-necessary"]: existing_instances = fetch_repo_instances( self._server, self._uuid) if self._instance_name not in existing_instances: raise RuntimeError( f"Instance '{self._instance_name}' does not exist in {self._server} / {self._uuid}." "Add 'create-if-necessary: true' to your config if you want it to be created.'" ) raise # Instance doesn't exist yet -- we are going to create it. if "segmentation-name" in volume_config["dvid"]: self._instance_type = 'labelmap' # get_voxels doesn't really care if it's labelarray or labelmap... self._is_labels = True else: self._instance_type = 'uint8blk' self._is_labels = False block_width = config_block_width else: self._instance_type = instance_info["Base"]["TypeName"] self._is_labels = self._instance_type in ('labelblk', 'labelarray', 'labelmap') if self._instance_type == "googlevoxels" and instance_info[ "Extended"]["Scales"][0]["channelType"] == "UINT64": self._is_labels = True bs_x, bs_y, bs_z = instance_info["Extended"]["BlockSize"] assert (bs_x == bs_y == bs_z), "Expected blocks to be cubes." block_width = bs_x if "disable-indexing" in volume_config["dvid"]: self.disable_indexing = volume_config["dvid"]["disable-indexing"] else: self.disable_indexing = DvidSegmentationServiceSchema[ "properties"]["disable-indexing"]["default"] if "enable-downres" in volume_config["dvid"]: self.enable_downres = volume_config["dvid"]["enable-downres"] else: self.enable_downres = DvidSegmentationServiceSchema["properties"][ "enable-downres"]["default"] if "gzip-level" in volume_config["dvid"]: self.gzip_level = volume_config["dvid"]["gzip-level"] else: self.gzip_level = DvidSegmentationServiceSchema["properties"][ "gzip-level"]["default"] # Whether or not to read the supervoxels from the labelmap instance instead of agglomerated labels. self.supervoxels = ("supervoxels" in volume_config["dvid"]) and ( volume_config["dvid"]["supervoxels"]) ## ## default block width ## assert config_block_width in (-1, block_width), \ f"DVID volume block-width ({config_block_width}) from config does not match server metadata ({block_width})" if block_width == -1: # No block-width specified; choose default block_width = 64 ## ## bounding-box ## bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] try: stored_extents = fetch_volume_box(self._server, self.uuid, self._instance_name) except HTTPError: assert -1 not in bounding_box_zyx.flat[:], \ f"Instance '{self._instance_name}' does not yet exist on the server, "\ "so your volume_config must specify explicit values for bounding-box" else: if stored_extents is not None and stored_extents.any(): replace_default_entries(bounding_box_zyx, stored_extents) ## ## message-block-shape ## preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, [block_width, block_width, 100 * block_width]) ## ## available-scales ## available_scales = list(volume_config["geometry"]["available-scales"]) ## ## resource_manager_client ## if resource_manager_client is None: # Dummy client resource_manager_client = ResourceManagerClient("", 0) ## ## Special setting to override resource manager for sparse coords ## try: use_resource_manager_for_sparse_coords = volume_config["dvid"][ "use-resource-manager-for-sparse-coords"] except KeyError: # Grayscale doesn't have this setting use_resource_manager_for_sparse_coords = False ## ## Store members ## self._resource_manager_client = resource_manager_client self._block_width = block_width self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._available_scales = available_scales self._use_resource_manager_for_sparse_coords = use_resource_manager_for_sparse_coords self.write_empty_blocks = volume_config["dvid"]["write-empty-blocks"] ## ## Overwrite config entries that we might have modified ## volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( ) # TODO: Check the server for available scales and overwrite in the config? #volume_config["geometry"]["available-scales"] = [0] if volume_config["dvid"]["create-if-necessary"]: self._create_instance(volume_config)
def autogen_points(input_seg, count, roi, body, tbars, use_skeleton, random_seed=None, minimum_distance=0): """ Generate a list of points within the input segmentation, based on the given criteria. See the main help text below for details. """ if tbars and not body: sys.exit( "If you want to auto-generate tbar points, please specify a body.") if not tbars and not count: sys.exit( "You must supply a --count unless you are generating all tbars of a body." ) if use_skeleton: if not body: sys.exit( "You must supply a body ID if you want to use a skeleton.") if tbars: sys.exit( "You can't select both tbar points and skeleton points. Pick one or the other." ) if not count and minimum_distance > 0: sys.exit( "You must supply a --count if you want skeleton point samples to respect the minimum distance." ) if not count and not roi and minimum_distance == 0: logger.warning( "You are using all nodes of a skeleton without any ROI filter! Is that what you meant?" ) rng = default_rng(random_seed) if tbars: logger.info(f"Fetching synapses for body {body}") syn_df = fetch_annotation_label(*input_seg[:2], 'synapses', body, format='pandas') tbars = syn_df.query('kind == "PreSyn"')[[*'zyx']] if roi: logger.info(f"Filtering tbars for roi {roi}") determine_point_rois(*input_seg[:2], [roi], tbars) tbars = tbars.query('roi == @roi')[[*'zyx']] if minimum_distance: logger.info( f"Pruning close points from {len(tbars)} total tbar points") tbars = prune_close_pairs(tbars, minimum_distance, rng) logger.info(f"After pruning, {len(tbars)} tbars remain.") if count: count = min(count, len(tbars)) logger.info(f"Sampling {count} tbars") choices = rng.choice(tbars.index, size=count, replace=False) tbars = tbars.loc[choices] logger.info(f"Returning {len(tbars)} tbar points") return tbars elif use_skeleton: assert body logger.info(f"Fetching skeleton for body {body}") skeleton_instance = f'{input_seg[2]}_skeletons' swc = fetch_key(*input_seg[:2], skeleton_instance, f'{body}_swc') skeleton_df = swc_to_dataframe(swc) skeleton_df['x'] = skeleton_df['x'].astype(int) skeleton_df['y'] = skeleton_df['y'].astype(int) skeleton_df['z'] = skeleton_df['z'].astype(int) if roi: logger.info(f"Filtering skeleton for roi {roi}") determine_point_rois(*input_seg[:2], [roi], skeleton_df) skeleton_df = skeleton_df.query('roi == @roi')[[*'zyx']] if minimum_distance: assert count # Distance-pruning is very expensive on a huge number of close points. # If skeleton is large, first reduce the workload by pre-selecting a # random sample of skeleton points, and prune more from there. if len(skeleton_df) > 10_000: # FIXME: random_state can't use rng until I upgrade to pandas 1.0 skeleton_df = skeleton_df.sample(min(4 * count, len(skeleton_df)), random_state=None) logger.info( f"Pruning close points from {len(skeleton_df)} skeleton points" ) prune_close_pairs(skeleton_df, minimum_distance, rng) logger.info( f"After pruning, {len(skeleton_df)} skeleton points remain.") if count: count = min(count, len(skeleton_df)) logger.info(f"Sampling {count} skeleton points") choices = rng.choice(skeleton_df.index, size=count, replace=False) skeleton_df = skeleton_df.loc[choices] logger.info(f"Returning {len(skeleton_df)} skeleton points") return skeleton_df elif body: assert count if roi: # TODO: intersect the ranges with the ROI. raise NotImplementedError( "Sorry, I haven't yet implemented support for " "body+roi filtering. Pick one or the other, " "or ask Stuart to fix this.") logger.info(f"Fetching sparsevol for body {body}") ranges = fetch_sparsevol(*input_seg, body, format='ranges') logger.info("Sampling from sparsevol") if minimum_distance > 0: # Sample 4x extra so we still have enough after pruning. points = sample_points_from_ranges(ranges, 4 * count, rng) else: points = sample_points_from_ranges(ranges, count, rng) points = pd.DataFrame(points, columns=[*'zyx']) if minimum_distance > 0: logger.info(f"Pruning close points from {len(points)} body points") prune_close_pairs(points, minimum_distance, rng) logger.info(f"After pruning, {len(points)} body points remain") points = points.iloc[:count] logger.info(f"Returning {len(points)} body points") return points elif roi: assert count logger.info(f"Fetching roi {roi}") roi_ranges = fetch_roi_roi(*input_seg[:2], roi, format='ranges') logger.info("Sampling from ranges") if minimum_distance > 0: # Sample 4x extra so we can prune some out if necessary. points_s5 = sample_points_from_ranges(roi_ranges, 4 * count, rng) else: points_s5 = sample_points_from_ranges(roi_ranges, count, rng) corners_s0 = points_s5 * (2**5) points_s0 = rng.integers(corners_s0, corners_s0 + (2**5)) points = pd.DataFrame(points_s0, columns=[*'zyx']) if minimum_distance > 0: logger.info(f"Pruning close points from {len(points)} roi points") prune_close_pairs(points, minimum_distance, rng) logger.info( f"After pruning, points from {len(points)} roi points remain") points = points.iloc[:count] logger.info(f"Returning {len(points)} roi points") return points else: # No body or roi specified, just sample from the whole non-zero segmentation area assert count logger.info("Sampling random points from entire input segmentation") logger.info("Fetching low-res input volume") box_s6 = round_box(fetch_volume_box(*input_seg), 2**6, 'out') // 2**6 seg_s6 = fetch_labelmap_voxels(*input_seg, box_s6, scale=6) mask_s6 = seg_s6.astype(bool) logger.info("Encoding segmentation as ranges") seg_ranges = runlength_encode_mask_to_ranges(mask_s6, box_s6) logger.info("Sampling from ranges") if minimum_distance > 0: # Sample 4x extra so we can prune some out if necessary. points_s6 = sample_points_from_ranges(seg_ranges, 4 * count, rng) else: points_s6 = sample_points_from_ranges(seg_ranges, count, rng) corners_s0 = points_s6 * (2**6) points_s0 = rng.integers(corners_s0, corners_s0 + (2**6)) points = pd.DataFrame(points_s0, columns=[*'zyx']) if minimum_distance > 0: logger.info( f"Pruning close points from {len(points)} segmentation points") prune_close_pairs(points, minimum_distance, rng) logger.info( f"After pruning, points from {len(points)} segmentation points remain" ) points = points.iloc[:count] logger.info(f"Returning {len(points)} segmentation points") return points
def load_roi_label_volume(server, uuid, rois_or_neuprint, box_s5=[None, None], export_path=None, export_labelmap=None): """ Fetch several ROIs from DVID and combine them into a single label volume or mask. The label values in the returned volume correspond to the order in which the ROI names were passed in, starting at label 1. This function is essentially a convenience function around fetch_combined_roi_volume(), but in this case it will optionally auto-fetch the ROI list, and auto-export the volume. Args: server: DVID server uuid: DVID uuid rois_or_neuprint: Either a list of ROIs or a neuprint server from which to obtain the roi list. box_s5: If you want to restrict the ROIs to a particular subregion, you may pass your own bounding box (at scale 5). Alternatively, you may pass the name of a segmentation instance from DVID whose bounding box will be used. export_path: If you want the ROI volume to be exported to disk, provide a path name ending with .npy or .h5. export_labelmap: If you want the ROI volume to be exported to a DVID labelmap instance, Provide the instance name, or a tuple of (server, uuid, instance). Returns: (roi_vol, roi_box), containing the fetched label volume and the bounding box it corresponds to, in DVID scale-5 coordinates. Note: If you have a list of (full-res) points to extract from the returned volume, pass a DataFrame with columns ['z','y','x'] to the following function. If you already downloaded the roi_vol (above), provide it. Otherwise, leave out those args and it will be fetched first. Adds columns to the input DF (in-place) for 'roi' (str) and 'roi_label' (int). >>> from neuclease.dvid import determine_point_rois >>> determine_point_rois(*master, rois, point_df, roi_vol, roi_box) """ if isinstance(box_s5, str): # Assume that this is a segmentation instance whose dimensions should be used # Fetch the maximum extents of the segmentation, # and rescale it for scale-5. seg_box = fetch_volume_box(server, uuid, box_s5) box_s5 = round_box(seg_box, (2**5), 'out') // 2**5 box_s5[0] = (0, 0, 0) if export_labelmap: assert isinstance(box_s5, np.ndarray) assert not (box_s5 % 64).any(), \ ("If exporting to a labelmap instance, please supply " "an explicit box and make sure it is block-aligned.") if isinstance(rois_or_neuprint, (str, neuprint.Client)): if isinstance(rois_or_neuprint, str): npclient = neuprint.Client(rois_or_neuprint) else: npclient = rois_or_neuprint # Fetch ROI names from neuprint q = "MATCH (m: Meta) RETURN m.superLevelRois as rois" rois = npclient.fetch_custom(q)['rois'].iloc[0] rois = sorted(rois) # # Remove '.*ACA' ROIs. Apparently there is some # # problem with them. (They overlap with other ROIs.) # rois = [*filter(lambda r: 'ACA' not in r, rois)] else: assert isinstance(rois_or_neuprint, collections.abc.Iterable) rois = rois_or_neuprint # Fetch each ROI and write it into a volume with Timer(f"Fetching combined ROI volume for {len(rois)} ROIs", logger): roi_vol, roi_box, overlap_stats = fetch_combined_roi_volume( server, uuid, rois, box_zyx=box_s5) if len(overlap_stats) > 0: logger.warn( f"Some ROIs overlap! Here's an incomplete list of overlapping pairs:\n{overlap_stats}" ) # Export to npy/h5py for external use if export_path: with Timer(f"Exporting to {export_path}", logger): if export_path.endswith('.npy'): np.save(export_path, roi_vol) elif export_path.endswith('.h5'): with h5py.File(export_path, 'w') as f: f.create_dataset('rois_scale_5', data=roi_vol, chunks=True) if export_labelmap: if isinstance(export_labelmap, str): export_labelmap = (server, uuid, export_labelmap) assert len(export_labelmap) == 3 with Timer(f"Exporting to {export_labelmap[2]}", logger): if export_labelmap[2] not in fetch_repo_instances( server, uuid, 'labelmap'): create_labelmap_instance( *export_labelmap, voxel_size=8 * (2**5), max_scale=6) # FIXME: hard-coded voxel size # It's really important to use this block shape. # See https://github.com/janelia-flyem/dvid/issues/342 boxes = boxes_from_grid(roi_box, (256, 256, 256), clipped=True) for box in tqdm_proxy(boxes): block = extract_subvol(roi_vol, box - roi_box[0]) post_labelmap_voxels(*export_labelmap, box[0], block, scale=0, downres=True) return roi_vol, roi_box, rois