def extract_body_ids_and_launch(c, args, seg_instance, body_csv, msgs_df): """ Extract the list of body IDs from the given kafka messages, overwrite the body list CSV file in the workflow template directory, and submit a cluster job to launch the workflow. """ # Late imports so --help works quickly import numpy as np import pandas as pd from neuclease.dvid import resolve_ref, fetch_mapping, compute_affected_bodies if len(msgs_df) == 0: return False exclude_bodies = load_bad_bodies() # If the uuid was specified as a branch, # resolve it to a specific uuid now. server, uuid, instance = seg_instance uuid = resolve_ref(server, uuid) # Extract all bodies and supervoxels that have been touched in the kafka log new_bodies, changed_bodies, _removed_bodies, new_supervoxels = compute_affected_bodies( msgs_df['msg']) # For touched supervoxels, we need to find their mapped bodies. sv_split_bodies = set( fetch_mapping(server, uuid, instance, new_supervoxels)) - set([0]) subset_bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies)) subset_bodies -= set(exclude_bodies) subset_bodies = np.fromiter(subset_bodies, np.uint64) subset_bodies = np.sort(subset_bodies).tolist() if len(subset_bodies) == 0: return False # Overwrite the CSV file for the workflow's subset-bodies set. pd.Series(subset_bodies, name='body').to_csv(f'{args.template_dir}/{body_csv}', header=True, index=False) first_timestamp = msgs_df['timestamp'].iloc[0] last_timestamp = msgs_df['timestamp'].iloc[-1] logger.info(f"Launching mesh computation for {len(subset_bodies)} bodies, " f"modified between [{first_timestamp}] and [{last_timestamp}]") # FIXME: Instead of hard-coding -W to one hour, read the template dask-config.yaml cmd = ( f"source $({args.conda_path} info --base)/bin/activate {args.conda_env} " f"&& cd {args.cwd} " f"&& bsub -W 01:00 -n {args.driver_slots} -o /dev/null launchflow -n {args.worker_slots} {args.template_dir}" ) run_cmd(c, cmd) return True
def _sanitize_config(self): """ - Normalize/overwrite certain config values - Check for config mistakes - Simple sanity checks """ # Resolve uuid if necessary (e.g. 'master' -> abc123) dvid_cfg = self.config["input"]["dvid"] dvid_cfg["uuid"] = resolve_ref(dvid_cfg["server"], dvid_cfg["uuid"]) # Convert input/output CSV to absolute paths options = self.config["decimatemeshes"] assert options["bodies"], \ "No input body list provided" if isinstance(options["bodies"], str) and options["bodies"].endswith(".csv"): assert os.path.exists(options["bodies"]), \ f'Input file does not exist: {options["bodies"]}'
def export_sparsevol(server, uuid, instance, neurons_df, scale=5, format='tiff', output_dir='.'): import os import vigra import numpy as np from neuclease.util import round_box, tqdm_proxy from neuclease.dvid import fetch_sparsevol, resolve_ref, fetch_volume_box, box_to_slicing uuid = resolve_ref(server, uuid) # Determine the segmentation bounding box at the given scale, # which is used as the mask shape. seg = (server, uuid, instance) box = round_box(fetch_volume_box(*seg), 64, 'out') box[0] = (0,0,0) box_scaled = box // 2**scale # How many digits will we need in each slice file name? digits = int(np.ceil(np.log10(box_scaled[1, 0]))) # Export a mask stack for each group. groups = neurons_df.groupby('group', sort=False) num_groups = neurons_df['group'].nunique() group_prog = tqdm_proxy(groups, total=num_groups) for group, df in group_prog: group_prog.write(f'Group "{group}": Assembling mask') group_mask = np.zeros(box_scaled[1], dtype=bool) group_mask = vigra.taggedView(group_mask, 'zyx') # Overlay each body mask in the current group for body in tqdm_proxy(df['body'], leave=False): body_mask, mask_box = fetch_sparsevol(*seg, body, scale=scale, format='mask') group_mask[box_to_slicing(*mask_box)] |= body_mask # Write out the slice files group_prog.write(f'Group "{group}": Writing slices') d = f'{output_dir}/{group}.stack' os.makedirs(d, exist_ok=True) for z in tqdm_proxy(range(group_mask.shape[0]), leave=False): p = ('{d}/{z:' + f'0{digits}' + 'd}.{f}').format(d=d, z=z, f=format) vigra.impex.writeImage(group_mask[z].astype(np.uint8), p)
def _prepare_output(self): """ If necessary, create the output directory or DVID instance so that meshes can be written to it. """ input_cfg = self.config["input"] output_cfg = self.config["output"] options = self.config["svdecimate"] ## directory output if 'directory' in output_cfg: # Convert to absolute so we can chdir with impunity later. output_cfg['directory'] = os.path.abspath(output_cfg['directory']) os.makedirs(output_cfg['directory'], exist_ok=True) return ## ## DVID output (either keyvalue or tarsupervoxels) ## (instance_type,) = output_cfg.keys() server = output_cfg[instance_type]['server'] uuid = output_cfg[instance_type]['uuid'] instance = output_cfg[instance_type]['instance'] # If the output server or uuid is left blank, # we assume it should be auto-filled from the input settings. if server == "" or uuid == "": assert "dvid" in input_cfg if server == "": output_cfg[instance_type]['server'] = input_cfg["dvid"]["server"] if uuid == "": output_cfg[instance_type]['uuid'] = input_cfg["dvid"]["uuid"] # Resolve in case a branch was given instead of a specific uuid server = output_cfg[instance_type]['server'] uuid = output_cfg[instance_type]['uuid'] uuid = resolve_ref(server, uuid) if is_locked(server, uuid): info = fetch_server_info(server) if "Mode" in info and info["Mode"] == "allow writes on committed nodes": logger.warn(f"Output is a locked node ({uuid}), but server is in full-write mode. Proceeding.") elif os.environ.get("DVID_ADMIN_TOKEN", ""): logger.warn(f"Output is a locked node ({uuid}), but you defined DVID_ADMIN_TOKEN. Proceeding.") else: raise RuntimeError(f"Can't write to node {uuid} because it is locked.") if instance_type == 'tarsupervoxels' and not self.input_is_labelmap_supervoxels(): msg = ("You shouldn't write to a tarsupervoxels instance unless " "you're reading supervoxels from a labelmap input.\n" "Use a labelmap input source, and set supervoxels: true") raise RuntimeError(msg) existing_instances = fetch_repo_instances(server, uuid) if instance in existing_instances: # Instance exists -- nothing to do. return if not output_cfg[instance_type]['create-if-necessary']: msg = (f"Output instance '{instance}' does not exist, " "and your config did not specify create-if-necessary") raise RuntimeError(msg) assert instance_type in ('tarsupervoxels', 'keyvalue') ## keyvalue output if instance_type == "keyvalue": create_instance(server, uuid, instance, "keyvalue", tags=["type=meshes"]) return ## tarsupervoxels output sync_instance = output_cfg["tarsupervoxels"]["sync-to"] if not sync_instance: # Auto-fill a default 'sync-to' instance using the input segmentation, if possible. info = fetch_instance_info(*[input_cfg["dvid"][k] for k in ("server", "uuid", "tarsupervoxels-instance")]) syncs = info['Base']['Syncs'] if syncs: sync_instance = syncs[0] if not sync_instance: msg = ("Can't create a tarsupervoxels instance unless " "you specify a 'sync-to' labelmap instance name.") raise RuntimeError(msg) if sync_instance not in existing_instances: msg = ("Can't sync to labelmap instance '{sync_instance}': " "it doesn't exist on the output server.") raise RuntimeError(msg) create_tarsupervoxel_instance(server, uuid, instance, sync_instance, options["format"])
def __init__(self, volume_config, resource_manager_client=None): validate(volume_config, DvidGenericVolumeSchema, inject_defaults=True) assert 'apply-labelmap' not in volume_config["dvid"].keys(), \ ("The apply-labelmap section should be in the 'adapters' section, (parallel to 'dvid' and 'geometry'), " "not nested within the 'dvid' section!") ## ## server, uuid ## ## Note: ## self.uuid will be resolved, but volume_config["dvid"]["uuid"] ## will not be overwritten. It will remain unresolved. ## self._server = volume_config["dvid"]["server"] self._uuid = resolve_ref(volume_config["dvid"]["server"], volume_config["dvid"]["uuid"]) self._throttle = volume_config["dvid"]["accept-throttling"] ## ## instance, dtype, etc. ## config_block_width = volume_config["geometry"]["block-width"] assert ('segmentation-name' in volume_config["dvid"]) ^ ('grayscale-name' in volume_config["dvid"]), \ "Config error: Specify either segmentation-name or grayscale-name (not both)" if "segmentation-name" in volume_config["dvid"]: self._instance_name = volume_config["dvid"]["segmentation-name"] self._dtype = np.uint64 elif "grayscale-name" in volume_config["dvid"]: self._instance_name = volume_config["dvid"]["grayscale-name"] self._dtype = np.uint8 self._dtype_nbytes = np.dtype(self._dtype).type().nbytes try: instance_info = fetch_instance_info(self._server, self._uuid, self._instance_name) except HTTPError as ex: if ex.response.status_code != 400: raise if not volume_config["dvid"]["create-if-necessary"]: existing_instances = fetch_repo_instances( self._server, self._uuid) if self._instance_name not in existing_instances: raise RuntimeError( f"Instance '{self._instance_name}' does not exist in {self._server} / {self._uuid}." "Add 'create-if-necessary: true' to your config if you want it to be created.'" ) raise # Instance doesn't exist yet -- we are going to create it. if "segmentation-name" in volume_config["dvid"]: self._instance_type = 'labelmap' # get_voxels doesn't really care if it's labelarray or labelmap... self._is_labels = True else: self._instance_type = 'uint8blk' self._is_labels = False block_width = config_block_width else: self._instance_type = instance_info["Base"]["TypeName"] self._is_labels = self._instance_type in ('labelblk', 'labelarray', 'labelmap') if self._instance_type == "googlevoxels" and instance_info[ "Extended"]["Scales"][0]["channelType"] == "UINT64": self._is_labels = True bs_x, bs_y, bs_z = instance_info["Extended"]["BlockSize"] assert (bs_x == bs_y == bs_z), "Expected blocks to be cubes." block_width = bs_x if "disable-indexing" in volume_config["dvid"]: self.disable_indexing = volume_config["dvid"]["disable-indexing"] else: self.disable_indexing = DvidSegmentationServiceSchema[ "properties"]["disable-indexing"]["default"] if "enable-downres" in volume_config["dvid"]: self.enable_downres = volume_config["dvid"]["enable-downres"] else: self.enable_downres = DvidSegmentationServiceSchema["properties"][ "enable-downres"]["default"] if "gzip-level" in volume_config["dvid"]: self.gzip_level = volume_config["dvid"]["gzip-level"] else: self.gzip_level = DvidSegmentationServiceSchema["properties"][ "gzip-level"]["default"] # Whether or not to read the supervoxels from the labelmap instance instead of agglomerated labels. self.supervoxels = ("supervoxels" in volume_config["dvid"]) and ( volume_config["dvid"]["supervoxels"]) ## ## default block width ## assert config_block_width in (-1, block_width), \ f"DVID volume block-width ({config_block_width}) from config does not match server metadata ({block_width})" if block_width == -1: # No block-width specified; choose default block_width = 64 ## ## bounding-box ## bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] try: stored_extents = fetch_volume_box(self._server, self.uuid, self._instance_name) except HTTPError: assert -1 not in bounding_box_zyx.flat[:], \ f"Instance '{self._instance_name}' does not yet exist on the server, "\ "so your volume_config must specify explicit values for bounding-box" else: if stored_extents is not None and stored_extents.any(): replace_default_entries(bounding_box_zyx, stored_extents) ## ## message-block-shape ## preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, [block_width, block_width, 100 * block_width]) ## ## available-scales ## available_scales = list(volume_config["geometry"]["available-scales"]) ## ## resource_manager_client ## if resource_manager_client is None: # Dummy client resource_manager_client = ResourceManagerClient("", 0) ## ## Special setting to override resource manager for sparse coords ## try: use_resource_manager_for_sparse_coords = volume_config["dvid"][ "use-resource-manager-for-sparse-coords"] except KeyError: # Grayscale doesn't have this setting use_resource_manager_for_sparse_coords = False ## ## Store members ## self._resource_manager_client = resource_manager_client self._block_width = block_width self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._available_scales = available_scales self._use_resource_manager_for_sparse_coords = use_resource_manager_for_sparse_coords self.write_empty_blocks = volume_config["dvid"]["write-empty-blocks"] ## ## Overwrite config entries that we might have modified ## volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( ) # TODO: Check the server for available scales and overwrite in the config? #volume_config["geometry"]["available-scales"] = [0] if volume_config["dvid"]["create-if-necessary"]: self._create_instance(volume_config)
def execute(self): self._init_service() options = self.config["roistats"] if not options["roi-server"]: assert isinstance(self.input_service, DvidVolumeService) options["roi-server"] = self.input_service.base_service.server if not options["roi-uuid"]: assert isinstance(self.input_service, DvidVolumeService) options["roi-uuid"] = self.input_service.base_service.uuid options["roi-uuid"] = resolve_ref(options["roi-server"], options["roi-uuid"]) is_supervoxels = (isinstance(self.input_service, DvidVolumeService) and self.input_service.base_service.supervoxels ) # noqa bodies = load_body_list(options["subset-bodies"], is_supervoxels) assert len( bodies) > 0, "Please provide a list of subset-bodies to process" scale = options["analysis-scale"] bounding_box = self.input_service.bounding_box_zyx assert not (bounding_box % 2**5).any(), \ "Make sure your configured bounding box is divisible by 32px at scale 0." brick_shape = self.input_service.preferred_message_shape assert not (brick_shape % 2**5).any(), \ "Make sure your preferred message shape divides into 32px blocks at scale 0" with Timer("Fetching ROI volume", logger): roi_vol_s5, roi_box_s5, overlaps = fetch_combined_roi_volume( options["roi-server"], options["roi-uuid"], options["rois"], False, bounding_box // 2**5) if len(overlaps) > 0: logger.warn( f"Some of your ROIs overlap! Here's an incomplete list:\n{overlaps}" ) with Timer("Determining brick set", logger): # Determine which bricks intersect our ROIs roi_brick_shape = self.input_service.preferred_message_shape // 2**5 roi_brick_boxes = boxes_from_mask((roi_vol_s5 != 0), roi_box_s5[0], roi_brick_shape, clipped=False) roi_brick_boxes *= 2**5 roi_brick_boxes = box_intersection( roi_brick_boxes, self.input_service.bounding_box_zyx) # Non-intersecting boxes have negative shape -- drop them. roi_brick_boxes = roi_brick_boxes[( (roi_brick_boxes[:, 1, :] - roi_brick_boxes[:, 0, :]) > 0).all( axis=1)] roi_brick_coords_df = pd.DataFrame(roi_brick_boxes[:, 0, :], columns=[*'zyx']) try: body_brick_coords_df = self.input_service.sparse_brick_coords_for_labels( bodies) except NotImplementedError: # Use all bricks in the ROIs, and use the special label -1 to # indicate that all bodies in the list might be found there. # (See below.) brick_coords_df = roi_brick_coords_df brick_coords_df['label'] = -1 else: brick_coords_df = body_brick_coords_df.merge( roi_brick_coords_df, 'inner', on=[*'zyx']) assert brick_coords_df.columns.tolist() == [*'zyx', 'label'] np.save('brick-coords.npy', brick_coords_df.to_records(index=False)) with Timer("Preparing bricks", logger): boxes_and_roi_bricks = [] for coord, brick_labels in brick_coords_df.groupby( [*'zyx'])['label'].agg(tuple).iteritems(): if brick_labels == (-1, ): # No sparse body brick locations were found above. # Search for all bodies in all bricks. brick_labels = bodies box = np.array((coord, coord)) box[1] += brick_shape box = box_intersection(box, bounding_box) roi_brick_box = ((box // 2**5) - roi_box_s5[0]) roi_brick_s5 = extract_subvol(roi_vol_s5, roi_brick_box) boxes_and_roi_bricks.append((box, roi_brick_s5, brick_labels)) scaled_shape = brick_shape // (2**scale) logger.info( f"Prepared {len(boxes_and_roi_bricks)} bricks of scale-0 shape " f"{(*brick_shape[::-1],)} ({(*scaled_shape[::-1],)} at scale-{scale})" ) all_stats = [] batches = [*iter_batches(boxes_and_roi_bricks, options["batch-size"])] logger.info(f"Processing {len(batches)} batches") for i, batch_boxes_and_bricks in enumerate(batches): with Timer(f"Batch {i:02d}", logger): batch_stats = self._execute_batch(scale, batch_boxes_and_bricks) all_stats.append(batch_stats) all_stats = pd.concat(all_stats, ignore_index=True) all_stats = all_stats.groupby(['body', 'roi_id'], as_index=False)['voxels'].sum() roi_names = pd.Series(["<none>", *options["rois"]], name='roi') roi_names.index.name = 'roi_id' all_stats = all_stats.merge(roi_names, 'left', on='roi_id') all_stats = all_stats.sort_values(['body', 'roi_id']) if scale > 0: all_stats.rename(columns={'voxels': f'voxels_s{scale}'}, inplace=True) with Timer(f"Writing stats ({len(all_stats)} rows)", logger): np.save('roi-stats.npy', all_stats.to_records(index=False)) all_stats.to_csv('roi-stats.csv', index=False, header=True)
def neuron_mito_stats(seg_src, mito_cc_src, mito_class_src, body_id, scale=0, min_size=0, search_radius=50, processes=1): from functools import partial import numpy as np import pandas as pd from neuclease.util import compute_parallel from neuclease.dvid import fetch_sparsevol_coarse, resolve_ref, fetch_labels, fetch_labelmap_voxels seg_src[1] = resolve_ref(*seg_src[:2]) mito_cc_src[1] = resolve_ref(*mito_cc_src[:2]) mito_class_src[1] = resolve_ref(*mito_class_src[:2]) # Fetch block coords; re-scale for the analysis scale block_coords = (2**6) * fetch_sparsevol_coarse(*seg_src, body_id) bc_df = pd.DataFrame(block_coords, columns=[*'zyx']) bc_df[[*'zyx']] //= 2**scale block_coords = bc_df.drop_duplicates().values # # Blockwise stats # block_fn = partial(_process_block, seg_src, mito_cc_src, mito_class_src, body_id, scale) block_tables = compute_parallel(block_fn, block_coords, processes=processes) block_tables = [*filter(lambda t: t is not None, block_tables)] # # Combine stats # full_table = pd.concat(block_tables, sort=True).fillna(0) class_cols = [*filter(lambda c: c.startswith('class'), full_table.columns)] full_table = full_table.astype({c: np.int32 for c in class_cols}) # Weight each block centroid by the block's voxel count before taking the mean full_table[[*'zyx']] *= full_table[['total_size']].values stats_df = full_table.groupby('mito_id').sum() stats_df[[*'zyx']] /= stats_df[['total_size']].values # Drop tiny mitos stats_df = stats_df.query("total_size >= @min_size").copy() # Assume all centroids are 'exact' by default (overwritten below if necessary) stats_df['centroid_type'] = 'exact' # Include a column for 'body' even thought its the same on every row, # just as a convenience for concatenating these results with the results # from other bodies if desired. stats_df['body'] = body_id stats_df = stats_df.astype({a: np.int32 for a in 'zyx'}) stats_df = stats_df[['body', *'xyz', 'total_size', *class_cols, 'centroid_type']] # # Check for centroids that fall outside of the mito, # and adjust them if necessary. # centroid_mitos = fetch_labels(*mito_cc_src, stats_df[[*'zyx']].values, scale=scale) mismatches = stats_df.index[(stats_df.index != centroid_mitos)] if len(mismatches) == 0: return stats_df logger.warning("Some mitochondria centroids do not lie within the mitochondria itself. " "Searching for pseudo-centroids.") # construct field of distances from the central voxel sr = search_radius cz, cy, cx = np.ogrid[-sr:sr+1, -sr:sr+1, -sr:sr+1] distances = np.sqrt(cz**2 + cy**2 + cx**2) pseudo_centroids = [] error_mito_ids = [] for row in stats_df.loc[mismatches].itertuples(): mito_id = row.Index centroid = np.array((row.z, row.y, row.x)) box = (centroid - sr, 1 + centroid + sr) mito_mask = (mito_id == fetch_labelmap_voxels(*mito_cc_src, box, scale)) if not mito_mask.any(): pseudo_centroids.append((row.z, row.y, row.x)) error_mito_ids.append(mito_id) continue # Find minimum distance masked_distances = np.where(mito_mask, distances, np.inf) new_centroid = np.unravel_index(np.argmin(masked_distances), masked_distances.shape) new_centroid = np.array(new_centroid) + centroid - sr pseudo_centroids.append(new_centroid) stats_df.loc[mismatches, ['z', 'y', 'x']] = np.array(pseudo_centroids, dtype=np.int32) stats_df.loc[mismatches, 'centroid_type'] = 'adjusted' stats_df.loc[error_mito_ids, 'centroid_type'] = 'error' if error_mito_ids: logger.warning("Some mitochondria pseudo-centroids could not be found.") stats_df = stats_df.astype({a: np.int32 for a in 'zyx'}) return stats_df