def extract_body_ids_and_launch(c, args, seg_instance, body_csv, msgs_df):
    """
    Extract the list of body IDs from the given kafka messages,
    overwrite the body list CSV file in the workflow template directory,
    and submit a cluster job to launch the workflow.
    """
    # Late imports so --help works quickly
    import numpy as np
    import pandas as pd
    from neuclease.dvid import resolve_ref, fetch_mapping, compute_affected_bodies

    if len(msgs_df) == 0:
        return False

    exclude_bodies = load_bad_bodies()

    # If the uuid was specified as a branch,
    # resolve it to a specific uuid now.
    server, uuid, instance = seg_instance
    uuid = resolve_ref(server, uuid)

    # Extract all bodies and supervoxels that have been touched in the kafka log
    new_bodies, changed_bodies, _removed_bodies, new_supervoxels = compute_affected_bodies(
        msgs_df['msg'])

    # For touched supervoxels, we need to find their mapped bodies.
    sv_split_bodies = set(
        fetch_mapping(server, uuid, instance, new_supervoxels)) - set([0])

    subset_bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies))
    subset_bodies -= set(exclude_bodies)
    subset_bodies = np.fromiter(subset_bodies, np.uint64)
    subset_bodies = np.sort(subset_bodies).tolist()

    if len(subset_bodies) == 0:
        return False

    # Overwrite the CSV file for the workflow's subset-bodies set.
    pd.Series(subset_bodies,
              name='body').to_csv(f'{args.template_dir}/{body_csv}',
                                  header=True,
                                  index=False)

    first_timestamp = msgs_df['timestamp'].iloc[0]
    last_timestamp = msgs_df['timestamp'].iloc[-1]

    logger.info(f"Launching mesh computation for {len(subset_bodies)} bodies, "
                f"modified between [{first_timestamp}] and [{last_timestamp}]")

    # FIXME: Instead of hard-coding -W to one hour, read the template dask-config.yaml
    cmd = (
        f"source $({args.conda_path} info --base)/bin/activate {args.conda_env} "
        f"&& cd {args.cwd} "
        f"&& bsub -W 01:00 -n {args.driver_slots} -o /dev/null launchflow -n {args.worker_slots} {args.template_dir}"
    )

    run_cmd(c, cmd)
    return True
    def _sanitize_config(self):
        """
        - Normalize/overwrite certain config values
        - Check for config mistakes
        - Simple sanity checks
        """
        # Resolve uuid if necessary (e.g. 'master' -> abc123)
        dvid_cfg = self.config["input"]["dvid"]
        dvid_cfg["uuid"] = resolve_ref(dvid_cfg["server"], dvid_cfg["uuid"])

        # Convert input/output CSV to absolute paths
        options = self.config["decimatemeshes"]
        assert options["bodies"], \
            "No input body list provided"

        if isinstance(options["bodies"], str) and options["bodies"].endswith(".csv"):
            assert os.path.exists(options["bodies"]), \
                f'Input file does not exist: {options["bodies"]}'
示例#3
0
def export_sparsevol(server, uuid, instance, neurons_df, scale=5, format='tiff', output_dir='.'):
    import os
    import vigra
    import numpy as np

    from neuclease.util import round_box, tqdm_proxy
    from neuclease.dvid import fetch_sparsevol, resolve_ref, fetch_volume_box, box_to_slicing

    uuid = resolve_ref(server, uuid)

    # Determine the segmentation bounding box at the given scale,
    # which is used as the mask shape.
    seg = (server, uuid, instance)
    box = round_box(fetch_volume_box(*seg), 64, 'out')
    box[0] = (0,0,0)
    box_scaled = box // 2**scale

    # How many digits will we need in each slice file name?
    digits = int(np.ceil(np.log10(box_scaled[1, 0])))

    # Export a mask stack for each group.
    groups = neurons_df.groupby('group', sort=False)
    num_groups = neurons_df['group'].nunique()
    group_prog = tqdm_proxy(groups, total=num_groups)
    for group, df in group_prog:
        group_prog.write(f'Group "{group}": Assembling mask')
        group_mask = np.zeros(box_scaled[1], dtype=bool)
        group_mask = vigra.taggedView(group_mask, 'zyx')

        # Overlay each body mask in the current group
        for body in tqdm_proxy(df['body'], leave=False):
            body_mask, mask_box = fetch_sparsevol(*seg, body, scale=scale, format='mask')
            group_mask[box_to_slicing(*mask_box)] |= body_mask

        # Write out the slice files
        group_prog.write(f'Group "{group}": Writing slices')
        d = f'{output_dir}/{group}.stack'
        os.makedirs(d, exist_ok=True)
        for z in tqdm_proxy(range(group_mask.shape[0]), leave=False):
            p = ('{d}/{z:' + f'0{digits}' + 'd}.{f}').format(d=d, z=z, f=format)
            vigra.impex.writeImage(group_mask[z].astype(np.uint8), p)
示例#4
0
    def _prepare_output(self):
        """
        If necessary, create the output directory or
        DVID instance so that meshes can be written to it.
        """
        input_cfg = self.config["input"]
        output_cfg = self.config["output"]
        options = self.config["svdecimate"]

        ## directory output
        if 'directory' in output_cfg:
            # Convert to absolute so we can chdir with impunity later.
            output_cfg['directory'] = os.path.abspath(output_cfg['directory'])
            os.makedirs(output_cfg['directory'], exist_ok=True)
            return

        ##
        ## DVID output (either keyvalue or tarsupervoxels)
        ##
        (instance_type,) = output_cfg.keys()

        server = output_cfg[instance_type]['server']
        uuid = output_cfg[instance_type]['uuid']
        instance = output_cfg[instance_type]['instance']

        # If the output server or uuid is left blank,
        # we assume it should be auto-filled from the input settings.
        if server == "" or uuid == "":
            assert "dvid" in input_cfg
            if server == "":
                output_cfg[instance_type]['server'] = input_cfg["dvid"]["server"]

            if uuid == "":
                output_cfg[instance_type]['uuid'] = input_cfg["dvid"]["uuid"]

        # Resolve in case a branch was given instead of a specific uuid
        server = output_cfg[instance_type]['server']
        uuid = output_cfg[instance_type]['uuid']
        uuid = resolve_ref(server, uuid)

        if is_locked(server, uuid):
            info = fetch_server_info(server)
            if "Mode" in info and info["Mode"] == "allow writes on committed nodes":
                logger.warn(f"Output is a locked node ({uuid}), but server is in full-write mode. Proceeding.")
            elif os.environ.get("DVID_ADMIN_TOKEN", ""):
                logger.warn(f"Output is a locked node ({uuid}), but you defined DVID_ADMIN_TOKEN. Proceeding.")
            else:
                raise RuntimeError(f"Can't write to node {uuid} because it is locked.")

        if instance_type == 'tarsupervoxels' and not self.input_is_labelmap_supervoxels():
            msg = ("You shouldn't write to a tarsupervoxels instance unless "
                   "you're reading supervoxels from a labelmap input.\n"
                   "Use a labelmap input source, and set supervoxels: true")
            raise RuntimeError(msg)

        existing_instances = fetch_repo_instances(server, uuid)
        if instance in existing_instances:
            # Instance exists -- nothing to do.
            return

        if not output_cfg[instance_type]['create-if-necessary']:
            msg = (f"Output instance '{instance}' does not exist, "
                   "and your config did not specify create-if-necessary")
            raise RuntimeError(msg)

        assert instance_type in ('tarsupervoxels', 'keyvalue')

        ## keyvalue output
        if instance_type == "keyvalue":
            create_instance(server, uuid, instance, "keyvalue", tags=["type=meshes"])
            return

        ## tarsupervoxels output
        sync_instance = output_cfg["tarsupervoxels"]["sync-to"]

        if not sync_instance:
            # Auto-fill a default 'sync-to' instance using the input segmentation, if possible.
            info = fetch_instance_info(*[input_cfg["dvid"][k] for k in ("server", "uuid", "tarsupervoxels-instance")])
            syncs = info['Base']['Syncs']
            if syncs:
                sync_instance = syncs[0]

        if not sync_instance:
            msg = ("Can't create a tarsupervoxels instance unless "
                   "you specify a 'sync-to' labelmap instance name.")
            raise RuntimeError(msg)

        if sync_instance not in existing_instances:
            msg = ("Can't sync to labelmap instance '{sync_instance}': "
                   "it doesn't exist on the output server.")
            raise RuntimeError(msg)

        create_tarsupervoxel_instance(server, uuid, instance, sync_instance, options["format"])
    def __init__(self, volume_config, resource_manager_client=None):
        validate(volume_config, DvidGenericVolumeSchema, inject_defaults=True)

        assert 'apply-labelmap' not in volume_config["dvid"].keys(), \
            ("The apply-labelmap section should be in the 'adapters' section, (parallel to 'dvid' and 'geometry'), "
             "not nested within the 'dvid' section!")

        ##
        ## server, uuid
        ##
        ## Note:
        ##   self.uuid will be resolved, but volume_config["dvid"]["uuid"]
        ##   will not be overwritten. It will remain unresolved.
        ##
        self._server = volume_config["dvid"]["server"]
        self._uuid = resolve_ref(volume_config["dvid"]["server"],
                                 volume_config["dvid"]["uuid"])

        self._throttle = volume_config["dvid"]["accept-throttling"]

        ##
        ## instance, dtype, etc.
        ##

        config_block_width = volume_config["geometry"]["block-width"]

        assert ('segmentation-name' in volume_config["dvid"]) ^ ('grayscale-name' in volume_config["dvid"]), \
            "Config error: Specify either segmentation-name or grayscale-name (not both)"

        if "segmentation-name" in volume_config["dvid"]:
            self._instance_name = volume_config["dvid"]["segmentation-name"]
            self._dtype = np.uint64
        elif "grayscale-name" in volume_config["dvid"]:
            self._instance_name = volume_config["dvid"]["grayscale-name"]
            self._dtype = np.uint8

        self._dtype_nbytes = np.dtype(self._dtype).type().nbytes

        try:
            instance_info = fetch_instance_info(self._server, self._uuid,
                                                self._instance_name)
        except HTTPError as ex:
            if ex.response.status_code != 400:
                raise

            if not volume_config["dvid"]["create-if-necessary"]:
                existing_instances = fetch_repo_instances(
                    self._server, self._uuid)
                if self._instance_name not in existing_instances:
                    raise RuntimeError(
                        f"Instance '{self._instance_name}' does not exist in {self._server} / {self._uuid}."
                        "Add 'create-if-necessary: true' to your config if you want it to be created.'"
                    )
                raise

            # Instance doesn't exist yet -- we are going to create it.
            if "segmentation-name" in volume_config["dvid"]:
                self._instance_type = 'labelmap'  # get_voxels doesn't really care if it's labelarray or labelmap...
                self._is_labels = True
            else:
                self._instance_type = 'uint8blk'
                self._is_labels = False

            block_width = config_block_width
        else:
            self._instance_type = instance_info["Base"]["TypeName"]
            self._is_labels = self._instance_type in ('labelblk', 'labelarray',
                                                      'labelmap')
            if self._instance_type == "googlevoxels" and instance_info[
                    "Extended"]["Scales"][0]["channelType"] == "UINT64":
                self._is_labels = True

            bs_x, bs_y, bs_z = instance_info["Extended"]["BlockSize"]
            assert (bs_x == bs_y == bs_z), "Expected blocks to be cubes."
            block_width = bs_x

        if "disable-indexing" in volume_config["dvid"]:
            self.disable_indexing = volume_config["dvid"]["disable-indexing"]
        else:
            self.disable_indexing = DvidSegmentationServiceSchema[
                "properties"]["disable-indexing"]["default"]

        if "enable-downres" in volume_config["dvid"]:
            self.enable_downres = volume_config["dvid"]["enable-downres"]
        else:
            self.enable_downres = DvidSegmentationServiceSchema["properties"][
                "enable-downres"]["default"]

        if "gzip-level" in volume_config["dvid"]:
            self.gzip_level = volume_config["dvid"]["gzip-level"]
        else:
            self.gzip_level = DvidSegmentationServiceSchema["properties"][
                "gzip-level"]["default"]

        # Whether or not to read the supervoxels from the labelmap instance instead of agglomerated labels.
        self.supervoxels = ("supervoxels" in volume_config["dvid"]) and (
            volume_config["dvid"]["supervoxels"])

        ##
        ## default block width
        ##
        assert config_block_width in (-1, block_width), \
            f"DVID volume block-width ({config_block_width}) from config does not match server metadata ({block_width})"
        if block_width == -1:
            # No block-width specified; choose default
            block_width = 64

        ##
        ## bounding-box
        ##
        bounding_box_zyx = np.array(
            volume_config["geometry"]["bounding-box"])[:, ::-1]
        try:
            stored_extents = fetch_volume_box(self._server, self.uuid,
                                              self._instance_name)
        except HTTPError:
            assert -1 not in bounding_box_zyx.flat[:], \
                f"Instance '{self._instance_name}' does not yet exist on the server, "\
                "so your volume_config must specify explicit values for bounding-box"
        else:
            if stored_extents is not None and stored_extents.any():
                replace_default_entries(bounding_box_zyx, stored_extents)

        ##
        ## message-block-shape
        ##
        preferred_message_shape_zyx = np.array(
            volume_config["geometry"]["message-block-shape"][::-1])
        replace_default_entries(preferred_message_shape_zyx,
                                [block_width, block_width, 100 * block_width])

        ##
        ## available-scales
        ##
        available_scales = list(volume_config["geometry"]["available-scales"])

        ##
        ## resource_manager_client
        ##
        if resource_manager_client is None:
            # Dummy client
            resource_manager_client = ResourceManagerClient("", 0)

        ##
        ## Special setting to override resource manager for sparse coords
        ##
        try:
            use_resource_manager_for_sparse_coords = volume_config["dvid"][
                "use-resource-manager-for-sparse-coords"]
        except KeyError:
            # Grayscale doesn't have this setting
            use_resource_manager_for_sparse_coords = False

        ##
        ## Store members
        ##
        self._resource_manager_client = resource_manager_client
        self._block_width = block_width
        self._bounding_box_zyx = bounding_box_zyx
        self._preferred_message_shape_zyx = preferred_message_shape_zyx
        self._available_scales = available_scales
        self._use_resource_manager_for_sparse_coords = use_resource_manager_for_sparse_coords
        self.write_empty_blocks = volume_config["dvid"]["write-empty-blocks"]

        ##
        ## Overwrite config entries that we might have modified
        ##
        volume_config["geometry"]["block-width"] = self._block_width
        volume_config["geometry"][
            "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist()
        volume_config["geometry"][
            "message-block-shape"] = self._preferred_message_shape_zyx[::
                                                                       -1].tolist(
                                                                       )

        # TODO: Check the server for available scales and overwrite in the config?
        #volume_config["geometry"]["available-scales"] = [0]

        if volume_config["dvid"]["create-if-necessary"]:
            self._create_instance(volume_config)
示例#6
0
    def execute(self):
        self._init_service()
        options = self.config["roistats"]

        if not options["roi-server"]:
            assert isinstance(self.input_service, DvidVolumeService)
            options["roi-server"] = self.input_service.base_service.server

        if not options["roi-uuid"]:
            assert isinstance(self.input_service, DvidVolumeService)
            options["roi-uuid"] = self.input_service.base_service.uuid

        options["roi-uuid"] = resolve_ref(options["roi-server"],
                                          options["roi-uuid"])

        is_supervoxels = (isinstance(self.input_service, DvidVolumeService)
                          and self.input_service.base_service.supervoxels
                          )  # noqa
        bodies = load_body_list(options["subset-bodies"], is_supervoxels)
        assert len(
            bodies) > 0, "Please provide a list of subset-bodies to process"

        scale = options["analysis-scale"]
        bounding_box = self.input_service.bounding_box_zyx
        assert not (bounding_box % 2**5).any(), \
            "Make sure your configured bounding box is divisible by 32px at scale 0."
        brick_shape = self.input_service.preferred_message_shape
        assert not (brick_shape % 2**5).any(), \
            "Make sure your preferred message shape divides into 32px blocks at scale 0"

        with Timer("Fetching ROI volume", logger):
            roi_vol_s5, roi_box_s5, overlaps = fetch_combined_roi_volume(
                options["roi-server"], options["roi-uuid"], options["rois"],
                False, bounding_box // 2**5)

        if len(overlaps) > 0:
            logger.warn(
                f"Some of your ROIs overlap!  Here's an incomplete list:\n{overlaps}"
            )

        with Timer("Determining brick set", logger):
            # Determine which bricks intersect our ROIs
            roi_brick_shape = self.input_service.preferred_message_shape // 2**5
            roi_brick_boxes = boxes_from_mask((roi_vol_s5 != 0),
                                              roi_box_s5[0],
                                              roi_brick_shape,
                                              clipped=False)
            roi_brick_boxes *= 2**5
            roi_brick_boxes = box_intersection(
                roi_brick_boxes, self.input_service.bounding_box_zyx)

            # Non-intersecting boxes have negative shape -- drop them.
            roi_brick_boxes = roi_brick_boxes[(
                (roi_brick_boxes[:, 1, :] - roi_brick_boxes[:, 0, :]) > 0).all(
                    axis=1)]
            roi_brick_coords_df = pd.DataFrame(roi_brick_boxes[:, 0, :],
                                               columns=[*'zyx'])
            try:
                body_brick_coords_df = self.input_service.sparse_brick_coords_for_labels(
                    bodies)
            except NotImplementedError:
                # Use all bricks in the ROIs, and use the special label -1 to
                # indicate that all bodies in the list might be found there.
                # (See below.)
                brick_coords_df = roi_brick_coords_df
                brick_coords_df['label'] = -1
            else:
                brick_coords_df = body_brick_coords_df.merge(
                    roi_brick_coords_df, 'inner', on=[*'zyx'])

            assert brick_coords_df.columns.tolist() == [*'zyx', 'label']
            np.save('brick-coords.npy',
                    brick_coords_df.to_records(index=False))

        with Timer("Preparing bricks", logger):
            boxes_and_roi_bricks = []
            for coord, brick_labels in brick_coords_df.groupby(
                [*'zyx'])['label'].agg(tuple).iteritems():
                if brick_labels == (-1, ):
                    # No sparse body brick locations were found above.
                    # Search for all bodies in all bricks.
                    brick_labels = bodies

                box = np.array((coord, coord))
                box[1] += brick_shape
                box = box_intersection(box, bounding_box)

                roi_brick_box = ((box // 2**5) - roi_box_s5[0])
                roi_brick_s5 = extract_subvol(roi_vol_s5, roi_brick_box)
                boxes_and_roi_bricks.append((box, roi_brick_s5, brick_labels))

        scaled_shape = brick_shape // (2**scale)
        logger.info(
            f"Prepared {len(boxes_and_roi_bricks)} bricks of scale-0 shape "
            f"{(*brick_shape[::-1],)} ({(*scaled_shape[::-1],)} at scale-{scale})"
        )

        all_stats = []
        batches = [*iter_batches(boxes_and_roi_bricks, options["batch-size"])]
        logger.info(f"Processing {len(batches)} batches")
        for i, batch_boxes_and_bricks in enumerate(batches):
            with Timer(f"Batch {i:02d}", logger):
                batch_stats = self._execute_batch(scale,
                                                  batch_boxes_and_bricks)
                all_stats.append(batch_stats)

        all_stats = pd.concat(all_stats, ignore_index=True)
        all_stats = all_stats.groupby(['body', 'roi_id'],
                                      as_index=False)['voxels'].sum()

        roi_names = pd.Series(["<none>", *options["rois"]], name='roi')
        roi_names.index.name = 'roi_id'
        all_stats = all_stats.merge(roi_names, 'left', on='roi_id')
        all_stats = all_stats.sort_values(['body', 'roi_id'])

        if scale > 0:
            all_stats.rename(columns={'voxels': f'voxels_s{scale}'},
                             inplace=True)

        with Timer(f"Writing stats ({len(all_stats)} rows)", logger):
            np.save('roi-stats.npy', all_stats.to_records(index=False))
            all_stats.to_csv('roi-stats.csv', index=False, header=True)
示例#7
0
def neuron_mito_stats(seg_src, mito_cc_src, mito_class_src, body_id, scale=0, min_size=0, search_radius=50, processes=1):
    from functools import partial
    import numpy as np
    import pandas as pd

    from neuclease.util import compute_parallel
    from neuclease.dvid import fetch_sparsevol_coarse, resolve_ref, fetch_labels, fetch_labelmap_voxels

    seg_src[1] = resolve_ref(*seg_src[:2])
    mito_cc_src[1] = resolve_ref(*mito_cc_src[:2])
    mito_class_src[1] = resolve_ref(*mito_class_src[:2])

    # Fetch block coords; re-scale for the analysis scale
    block_coords = (2**6) * fetch_sparsevol_coarse(*seg_src, body_id)
    bc_df = pd.DataFrame(block_coords, columns=[*'zyx'])
    bc_df[[*'zyx']] //= 2**scale
    block_coords = bc_df.drop_duplicates().values

    #
    # Blockwise stats
    #
    block_fn = partial(_process_block, seg_src, mito_cc_src, mito_class_src, body_id, scale)
    block_tables = compute_parallel(block_fn, block_coords, processes=processes)
    block_tables = [*filter(lambda t: t is not None, block_tables)]
    #
    # Combine stats
    #
    full_table = pd.concat(block_tables, sort=True).fillna(0)
    class_cols = [*filter(lambda c: c.startswith('class'), full_table.columns)]
    full_table = full_table.astype({c: np.int32 for c in class_cols})

    # Weight each block centroid by the block's voxel count before taking the mean
    full_table[[*'zyx']] *= full_table[['total_size']].values
    stats_df = full_table.groupby('mito_id').sum()
    stats_df[[*'zyx']] /= stats_df[['total_size']].values

    # Drop tiny mitos
    stats_df = stats_df.query("total_size >= @min_size").copy()

    # Assume all centroids are 'exact' by default (overwritten below if necessary)
    stats_df['centroid_type'] = 'exact'

    # Include a column for 'body' even thought its the same on every row,
    # just as a convenience for concatenating these results with the results
    # from other bodies if desired.
    stats_df['body'] = body_id

    stats_df = stats_df.astype({a: np.int32 for a in 'zyx'})
    stats_df = stats_df[['body', *'xyz', 'total_size', *class_cols, 'centroid_type']]

    #
    # Check for centroids that fall outside of the mito,
    # and adjust them if necessary.
    #
    centroid_mitos = fetch_labels(*mito_cc_src, stats_df[[*'zyx']].values, scale=scale)
    mismatches = stats_df.index[(stats_df.index != centroid_mitos)]

    if len(mismatches) == 0:
        return stats_df

    logger.warning("Some mitochondria centroids do not lie within the mitochondria itself. "
                   "Searching for pseudo-centroids.")

    # construct field of distances from the central voxel
    sr = search_radius
    cz, cy, cx = np.ogrid[-sr:sr+1, -sr:sr+1, -sr:sr+1]
    distances = np.sqrt(cz**2 + cy**2 + cx**2)

    pseudo_centroids = []
    error_mito_ids = []
    for row in stats_df.loc[mismatches].itertuples():
        mito_id = row.Index
        centroid = np.array((row.z, row.y, row.x))
        box = (centroid - sr, 1 + centroid + sr)
        mito_mask = (mito_id == fetch_labelmap_voxels(*mito_cc_src, box, scale))

        if not mito_mask.any():
            pseudo_centroids.append((row.z, row.y, row.x))
            error_mito_ids.append(mito_id)
            continue

        # Find minimum distance
        masked_distances = np.where(mito_mask, distances, np.inf)
        new_centroid = np.unravel_index(np.argmin(masked_distances), masked_distances.shape)
        new_centroid = np.array(new_centroid) + centroid - sr
        pseudo_centroids.append(new_centroid)

    stats_df.loc[mismatches, ['z', 'y', 'x']] = np.array(pseudo_centroids, dtype=np.int32)
    stats_df.loc[mismatches, 'centroid_type'] = 'adjusted'
    stats_df.loc[error_mito_ids, 'centroid_type'] = 'error'

    if error_mito_ids:
        logger.warning("Some mitochondria pseudo-centroids could not be found.")

    stats_df = stats_df.astype({a: np.int32 for a in 'zyx'})
    return stats_df