Пример #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--neuprint-server', '-n', default='neuprint.janelia.org')
    parser.add_argument('--dataset', '-d')
    parser.add_argument('--init', '-i', choices=['groundtruth', 'random'])
    parser.add_argument('--verbose', '-v', action='store_true')
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--min-weight', '-w', default=10, type=int)
    args = parser.parse_args()

    c = Client(args.neuprint_server, args.dataset)
    export_dir = f"{c.dataset}-w{args.min_weight}-from-{args.init}"
    os.makedirs(export_dir, exist_ok=True)

    # Fetch connectome (and export)
    with Timer("Fetching/exporting connectome", logger):
        criteria = NC(status='Traced', cropped=False, client=c)
        neuron_df, roi_conn_df = fetch_adjacencies(criteria, criteria, min_total_weight=args.min_weight, export_dir=export_dir, properties=['type', 'instance'], client=c)
        conn_df = roi_conn_df.groupby(['bodyId_pre', 'bodyId_post'], as_index=False)['weight'].sum()
    
    strong_connections_df, g, nbs, partition_df = infer_hierarchy(neuron_df,
                                                                  conn_df,
                                                                  args.min_weight,
                                                                  args.init,
                                                                  args.verbose,
                                                                  args.debug)

    with Timer("Exporting inference results", logger):
        pickle.dump(g,                     open(f'{export_dir}/graph.pkl', 'wb'))
        pickle.dump(nbs,                   open(f'{export_dir}/nested-block-state.pkl', 'wb'))
        pickle.dump(partition_df,          open(f'{export_dir}/partition_df.pkl', 'wb'))
        pickle.dump(strong_connections_df, open(f'{export_dir}/strong_connections_df.pkl', 'wb'))

    logger.info("DONE")
Пример #2
0
    def execute(self):
        self._init_services()
        self._validate_config()

        options = self.config["copygrayscale"]
        input_bb_zyx = self.input_service.bounding_box_zyx

        min_scale = options["min-pyramid-scale"]
        max_scale = options["max-pyramid-scale"]

        starting_slice = options["starting-slice"]

        axis_name = options["slab-axis"]
        axis = 'zyx'.index(axis_name)
        slab_boxes = list(slabs_from_box(input_bb_zyx, options["slab-depth"], slab_cutting_axis=axis))
        logger.info(f"Processing volume in {len(slab_boxes)} slabs")

        for slab_index, slab_fullres_box_zyx in enumerate(slab_boxes):
            if slab_fullres_box_zyx[0, axis] < starting_slice:
                logger.info(f"Slab {slab_index}: SKIPPING. {slab_fullres_box_zyx[:,::-1].tolist()}")
                continue

            with Timer() as slab_timer:
                logger.info(f"Slab {slab_index}: STARTING. {slab_fullres_box_zyx[:,::-1].tolist()}")
                slab_wall = None
                for scale in range(0, max_scale+1):
                    with Timer() as scale_timer:
                        slab_wall = self._process_slab(scale, slab_fullres_box_zyx, slab_index, len(slab_boxes), slab_wall, min_scale)
                    logger.info(f"Slab {slab_index}: Scale {scale} took {scale_timer.timedelta}")

            logger.info(f"Slab {slab_index}: DONE. ({slab_timer.timedelta})", extra={'status': f"DONE with slab {slab_index}"})

        logger.info(f"DONE exporting {len(slab_boxes)} slabs")
Пример #3
0
def main():
    # Hard-coded parameters
    prod = 'emdata4:8900'
    master = (prod, find_master(prod))
    master_seg = (*master, 'segmentation')

    # I accidentally corrupted the labelindex of bodies in this region
    patch_box = 20480 + np.array([[0, 0, 0], [1024, 1024, 1024]])

    with Timer("Fetching supervoxels", logger):
        boxes = boxes_from_grid(patch_box, Grid((64, 64, 6400)), clipped=True)
        sv_sets = compute_parallel(partial(_fetch_svs, master_seg),
                                   boxes,
                                   processes=32,
                                   ordered=False,
                                   leave_progress=True)
        svs = set(chain(*sv_sets)) - set([0])

    bodies = set(fetch_mapping(*master_seg, svs))

    with Timer(f"Repairing {len(bodies)} labelindexes", logger):
        compute_parallel(partial(_repair_index, master_seg),
                         bodies,
                         processes=32,
                         ordered=False,
                         leave_progress=True)

    print("DONE.")
Пример #4
0
def sv_to_mesh(server,
               uuid,
               instance,
               sv,
               smoothing_iterations=0,
               simplification_fraction=1.0,
               max_box_volume=DEFAULT_MAX_BOUNDING_BOX_VOL):
    """
    Download a mask for the given supervoxel and generate a mesh from it.
    If the mask bounding box would be large at scale 0, a smaller scale will be used.
    The returned mesh will always use scale-0 coordinates, though.
    """
    with Timer("Fetching supervoxel mask", logger):
        mask, scale, scaled_box = fetch_supervoxel_mask(
            server, uuid, instance, sv, max_box_volume)
        fullres_box = scaled_box * (2**scale)

    with Timer(f"Generating mesh from scale {scale}", logger):
        mesh = Mesh.from_binary_vol(mask, fullres_box)

    with Timer(f"Smoothing ({smoothing_iterations})", logger):
        mesh.laplacian_smooth(smoothing_iterations)

    # If we chose a scale other than 0, automatically reduce the
    # amount of decimation, since there will already be fewer vertices at lower resolution.
    simplification_fraction *= (2**scale)**2
    simplification_fraction = min(1.0, simplification_fraction)

    with Timer(f"Decimating ({simplification_fraction})", logger):
        mesh.simplify(simplification_fraction, in_memory=True)

    logger.info(
        f"Mesh has {len(mesh.vertices_zyx)} vertices and {len(mesh.faces)} faces"
    )
    return mesh
Пример #5
0
def load_stats_h5_to_records(h5_path):
    """
    Read a block segment statistics HDF5 file.
    The file should contain a dataset named 'stats', whose dtype
    is the same as STATS_DTYPE, but possibly without a 'body_id' column. 

    If the dataset contains no 'body_id' column,
    one is prepended to the result (as a copy of the segment_id column).
    
    Returns:
        (block_sv_stats, presorted_by, agglomeration_path)
        
        where:
            block_sv_stats:
                ndarray with dtype=STATS_DTYPE
            
            presorted_by:
                One of the following:
                    - None: stats are not sorted
                    - 'segment_id': stats were sorted by the 'segment_id' column
                    - 'body_id': stats were sorted by the 'body_id' column

            agglomeration_path:
                A path pointing to the agglomeration mapping which was used to produce the 'body_id' column when the file was saved.
    """
    with h5py.File(h5_path, 'r') as f:
        dset = f['stats']
        with Timer(f"Allocating RAM for {len(dset)} block stats rows", logger):
            block_sv_stats = np.empty(dset.shape, dtype=STATS_DTYPE)

        if 'body_id' in dset.dtype.names:
            dest_view = block_sv_stats
        else:
            full_view = block_sv_stats.view([('body_col', [STATS_DTYPE[0]]),
                                             ('other_cols', STATS_DTYPE[1:])])
            dest_view = full_view['other_cols']

        with Timer(f"Loading block stats into RAM", logger):
            h5_batch_size = 1_000_000
            for batch_start in range(0, len(dset), h5_batch_size):
                batch_stop = min(batch_start + h5_batch_size, len(dset))
                dest_view[batch_start:batch_stop] = dset[
                    batch_start:batch_stop]

        if 'body_id' not in dset.dtype.names:
            block_sv_stats['body_id'] = block_sv_stats['segment_id']

        try:
            presorted_by = dset.attrs['presorted-by']
            assert presorted_by in ('segment_id', 'body_id')
        except KeyError:
            presorted_by = None

        agglomeration_path = None
        if presorted_by == 'body_id':
            agglomeration_path = dset.attrs['agglomeration-mapping-path']

    return block_sv_stats, presorted_by, agglomeration_path
Пример #6
0
    def init_brickwall(self, volume_service, subset_labels, roi):
        sbm = None

        if roi["name"]:
            base_service = volume_service.base_service

            if not roi["server"] or not roi["uuid"]:
                assert isinstance(base_service, DvidVolumeService), \
                    "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

            roi["server"] = (roi["server"] or volume_service.server)
            roi["uuid"] = (roi["uuid"] or volume_service.uuid)

            if roi["scale"] is not None:
                scale = roi["scale"]
            elif isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"
            else:
                scale = 0

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, 2**(5-scale))
            seg_box_s0 = seg_box * 2**scale
            seg_box_s5 = seg_box // 2**(5-scale)

            with Timer(f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger):
                roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5)

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5-scale))

        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels([*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError("Could not find sparse masks for any of the subset-labels")
            except NotImplementedError:
                sbm = None

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes

            # Apply halo WHILE downloading the data.
            # TODO: Allow the user to configure whether or not the halo should
            #       be fetched from the outset, or added after the blocks are loaded.
            halo = self.config["connectedcomponents"]["halo"]
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, halo, sbm, compression='lz4_2x')

        return brickwall
Пример #7
0
def infer_hierarchy(neuron_df, connection_df, min_weight=10, init='groundtruth', verbose=True, special_debug=False):
    ##
    ## TODO: If filtering connections for min_weight drops some neurons entirely, they should be removed from neuron_df
    ##
    lsf_slots = os.environ.get('LSB_DJOB_NUMPROC', default=0)
    if lsf_slots:
        os.environ['OMP_NUM_THREADS'] = lsf_slots
        logger.info(f"Using {lsf_slots} CPUs for OpenMP")

    assert init in ('groundtruth', 'random')
    neuron_df = load_table(neuron_df)
    connection_df = load_table(connection_df)

    assert {*neuron_df.columns} >= {'bodyId', 'instance', 'type'}
    assert {*connection_df.columns} >= {'bodyId_pre', 'bodyId_post', 'weight'}

    if special_debug:
        # Choose a very small subset of the data
        neuron_df = neuron_df.iloc[::100]
        bodies = neuron_df['bodyId']
        connection_df = connection_df.query('bodyId_pre in @bodies and bodyId_post in @bodies')

    if init == "groundtruth":
        with Timer("Computing initial hierarchy from groundtruth", logger):
            assign_morpho_indexes(neuron_df)
            num_morpho_groups = neuron_df.morpho_index.max()+1
            init_bs = [neuron_df['morpho_index'].values, np.zeros(num_morpho_groups, dtype=int)]
    else:
        init_bs = None

    # If this is a per-ROI table, sum up the ROIs.
    if 'roi' in connection_df:
        connection_df = connection_df.groupby(['bodyId_pre', 'bodyId_post'], as_index=False)['weight'].sum()

    strong_connections_df = connection_df.query('weight >= @min_weight')
    strong_bodies = pd.unique(strong_connections_df[['bodyId_pre', 'bodyId_post']].values.reshape(-1))
    weights = strong_connections_df.set_index(['bodyId_pre', 'bodyId_post'])['weight']
    
    logger.info(f"Strong connectome (cutoff={min_weight}) has {len(strong_bodies)} bodies and {len(weights)} edges")
    
    vertexes = np.arange(len(strong_bodies), dtype=np.uint32)
    vertex_mapper = LabelMapper(strong_bodies.astype(np.uint64), vertexes)
    vertex_reverse_mapper = LabelMapper(vertexes, strong_bodies.astype(np.uint64))

    g = construct_graph(weights, vertexes, vertex_mapper)
    
    with Timer("Running inference"):
        # Computes a NestedBlockState
        nbs = graph_tool.inference.minimize_nested_blockmodel_dl(g,
                                                                 bs=init_bs,
                                                                 mcmc_args=dict(parallel=True), # see graph-tool docs and mailing list for caveats 
                                                                 deg_corr=True,
                                                                 verbose=verbose)

    partition_df = construct_partition_table(nbs, neuron_df, vertexes, vertex_reverse_mapper)
    return strong_connections_df, g, nbs, partition_df
Пример #8
0
    def execute(self):
        input_wall = self.init_brickwall()

        block_shape = 3*[self.config["input"]["geometry"]["block-width"]]
        def compute_stats(brick):
            return block_stats_for_volume(block_shape, brick.volume, brick.physical_box)

        with Timer("Computing block stats", logger):            
            all_stats = input_wall.bricks.map(compute_stats).compute()

        with Timer("Concatenating block stats", logger):            
            stats_df = pd.concat(all_stats, ignore_index=True)
        
        with Timer("Writing block stats", logger):            
            self.write_block_stats(stats_df)
Пример #9
0
def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("--output", "-o", type=str, required=False)
    parser.add_argument('dvid_server')
    parser.add_argument('uuid')
    parser.add_argument('labelmap_instance')
    parser.add_argument('assignment_json')
    args = parser.parse_args()

    if args.output is None:
        name, ext = os.path.splitext(args.assignment_json)
        args.output = name + '-adjusted' + ext

    instance_info = (args.dvid_server, args.uuid, args.labelmap_instance)

    with Timer(f"Processing {args.assignment_json}", logger):
        with open(args.assignment_json, 'r') as f:
            assignment_data = ujson.load(f)

        new_assignment_data = adjust_focused_points(*instance_info,
                                                    assignment_data)

        with open(args.output, 'w') as f:
            ujson.dump(new_assignment_data, f, indent=2)

    logger.info(f"Wrote to {args.output}")
def _fetch_mito_mask(mito_src, body_mask, mask_box, body_block_corners, scale,
                     mito_min_size, mito_scale_offset):
    assert scale - mito_scale_offset >= 0, \
        "FIXME: need to upsample the mito seg if using scale 0.  Not implemented yet."

    with Timer("Fetching mito mask", logger):
        if _have_flyemflows and isinstance(mito_src, VolumeService):
            mito_seg = mito_src.get_subvolume(mask_box, scale)
        else:
            assert len(mito_src) == 3 and all(
                isinstance(s, str) for s in mito_src)
            mito_seg = fetch_labelmap_specificblocks(*mito_src,
                                                     body_block_corners,
                                                     scale - mito_scale_offset,
                                                     supervoxels=True,
                                                     threads=4)

    # mito classes 1,2,3 are valid;
    # mito mask class 4 means "empty", as does 0.
    mito_mask = np.array([0, 1, 1, 1, 0], np.uint8)[mito_seg]

    body_mito_mask = np.where(body_mask, mito_mask, 0)
    body_mito_mask = vigra.taggedView(body_mito_mask, 'zyx')
    body_mito_cc = labelMultiArrayWithBackground(body_mito_mask)

    # Erase small mitos from body_mito_mask
    mito_sizes = np.bincount(body_mito_cc.reshape(-1))
    mito_sizes[0] = 0
    body_mito_mask = (mito_sizes > mito_min_size)[body_mito_cc]
    return body_mito_mask
Пример #11
0
def persist_and_execute(bag,
                        description=None,
                        logger=None,
                        optimize_graph=True):
    """
    Persist and execute the given dask.Bag.
    The persisted Bag is returned.
    """
    assert isinstance(bag, Bag)
    if logger and description:
        logger.info(f"{description}...")

    with Timer() as timer:
        bag = bag.persist(optimize_graph=optimize_graph)
        count = bag.count().compute()  # force eval
        parts = bag.npartitions
        partition_counts = bag.map_partitions(
            lambda part: [sum(1 for _ in part)]).compute()
        histogram = defaultdict(lambda: 0)
        for c in partition_counts:
            histogram[c] += 1
        histogram = dict(histogram)

    if logger and description:
        logger.info(
            f"{description} (N={count}, P={parts}, P_hist={histogram}) took {timer.timedelta}"
        )

    return bag
Пример #12
0
    def execute(self):
        self._init_services()
        self._sanitize_config()
        self._init_stats_file()

        options = self.config["masksegmentation"]
        min_scale = options["min-pyramid-scale"]
        max_scale = options["max-pyramid-scale"]

        resumed_scale = options["resume-at"]["scale"]
        resumed_batch = options["resume-at"]["batch-index"]

        if resumed_scale != -1 or resumed_batch != 0:
            logger.info(
                f"Resuming at scale {resumed_scale} batch {resumed_batch}")

        if resumed_scale == -1:
            starting_scale = max_scale
        else:
            starting_scale = resumed_scale

        mask_s5, mask_box_s5 = self._init_mask()

        # Process in reverse-order, since it's convenient to check the
        # low-res scales while the higher ones are still processing.
        starting_batch = max(0, resumed_batch)
        for scale in range(starting_scale, min_scale - 1, -1):
            if scale != starting_scale:
                starting_batch = 0

            with Timer(f"Scale {scale}: Processing", logger):
                self._execute_scale(scale, starting_batch, mask_s5,
                                    mask_box_s5)
Пример #13
0
    def run(self, kill_cluster=True):
        """
        Run the workflow by calling the subclass's execute() function
        (with some startup/shutdown steps before/after).
        """
        logger.info(f"Working dir: {os.getcwd()}")

        # The execute() function is run within these nested contexts.
        # See contexts.py
        workflow_name = self.config['workflow-name']
        cluster_type = self.config["cluster-type"]
        max_wait = self.config["cluster-max-wait"]

        # If you're trying to debug a C++ Python extension with AddressSanitizer,
        # uncomment this function call.
        # See developer-examples/ASAN_NOTES.txt for details.
        # self._preload_asan_mac()

        with \
        Timer(f"Running {workflow_name} with {self.num_workers} workers", logger), \
        LocalResourceManager(self.config["resource-manager"]), \
        ClusterContext(cluster_type, self.num_workers, True, max_wait, not kill_cluster) as self.cc, \
        environment_context(self.config["environment-variables"], self), \
        WorkerDaemons(self):
            self.execute()
Пример #14
0
def filter_groups_for_min_boi_count(edges_df,
                                    bois,
                                    group_columns=['group_cc'],
                                    min_boi_count=2):
    """
    Group the given dataframe according to the columns listed in `group_columns`,
    and count how many BOIs exist in each group.
    
    Then drop rows from the original dataframe if the group they belong to didn't have enough BOIs.
    """
    with Timer("Filtering out groups with too few BOIs", logger):
        bois = np.fromiter(bois, np.uint64)
        bois.sort()
        assert isinstance(group_columns, (list, tuple))

        boi_counts_df = edges_df[['label_a', 'label_b', *group_columns]].copy()
        boi_counts_df['is_boi_a'] = boi_counts_df.eval('label_a in @bois')
        boi_counts_df['is_boi_b'] = boi_counts_df.eval('label_b in @bois')
        boi_counts_df['boi_count'] = boi_counts_df['is_boi_a'].astype(
            int) + boi_counts_df['is_boi_b'].astype(int)

        group_boi_counts = boi_counts_df.groupby(
            group_columns)['boi_count'].agg('sum')
        group_boi_counts = group_boi_counts[group_boi_counts >= min_boi_count]

        kept_groups_df = group_boi_counts.reset_index()[[*group_columns]]
        logger.info(
            f"Keeping {len(kept_groups_df)} groups ({group_columns}) out of {len(boi_counts_df)}"
        )

        edges_df = edges_df.merge(kept_groups_df, 'inner', on=group_columns)
    return edges_df
Пример #15
0
def append_group_ccs(edges_df, subset_groups, max_distance=None):
    """
    For the given edges_df, assign a group to each edge
    (duplicating edges if they belong to multiple groups),
    and return the cc id as a new column 'group_cc'.
    
    The CC operation is performed on all groups at once,
    using disjoint sets of node IDs for every group.
    Thus, the CC ids for each group do NOT start at 1.
    Rather, the values in group_cc are arbitrary and
    not even consecutive.
    
    max_distance:
        If provided, exclude edges that exceed this distance from
        the CC computation (but include them in the resulting dataframe).
        For such excluded edges, group_cc == -1. 
    """
    with Timer("Computing group_cc", logger):
        edges_df = append_group_col(edges_df, subset_groups)

        # Assign a unique id for every label/group combination,
        # so we can run CC on the whole set at once.
        # Labels that appear more than once (in different groups)
        # will be treated as independent nodes,
        # and there will be no edges between groups.
        #
        # Note: Assigning node IDs this way assumes subset-requirement == 2
        subset_groups = subset_groups[['label', 'group']].copy()
        subset_groups['node_id'] = subset_groups.index.astype(np.uint32)
        
        # Append columns for [node_id_a, node_id_b]
        edges_df = (edges_df.merge( subset_groups, 'left',
                                    left_on=['label_a', 'group'], right_on=['label', 'group'])
                    .drop('label', axis=1))
        edges_df = (edges_df.merge( subset_groups, 'left',
                                    left_on=['label_b', 'group'], right_on=['label', 'group'],
                                    suffixes=['_a', '_b'])
                   .drop('label', axis=1))

        # Drop edges that are too distant to consider for CC
        if max_distance is None:
            thresholded_edges = edges_df[['node_id_a', 'node_id_b']].values
        else:
            thresholded_edges = edges_df.query('distance <= @max_distance')[['node_id_a', 'node_id_b']].values

        # Compute CC on the entire edge set, yielding a unique id for every CC in each group
        group_cc = 1 + connected_components_nonconsecutive(thresholded_edges, subset_groups['node_id'].values)
        subset_groups['group_cc'] = group_cc.astype(np.int32)
        
        # Append group_cc to every row.
        # All edges we actually used will have the same group_cc for node_id_a/node_id_b,
        # so just use node_id_a as the lookup.
        edges_df = edges_df.merge(subset_groups[['node_id', 'group_cc']], 'left', left_on='node_id_a', right_on='node_id')
        edges_df = edges_df.drop(['node_id_a', 'node_id_b', 'node_id'], axis=1)
    
        # But edges that were NOT used might be part of two different components.
        # group_cc has no valid value for those rows.  Set to -1.
        edges_df['group_cc'] = edges_df['group_cc'].astype(np.int32)
        edges_df.loc[edges_df['distance'] > max_distance, 'group_cc'] = np.int32(-1)
        return edges_df, subset_groups
Пример #16
0
    def _execute_scale(self, scale, starting_batch, mask_s5, mask_box_s5):
        options = self.config["masksegmentation"]
        block_width = self.output_service.block_width

        def scale_box(box, scale):
            # Scale down, then round up to the nearest multiple of the block width
            box = np.ceil(box / 2**scale).astype(np.int32)
            return round_box(box, block_width)

        # bounding box of the segmentation at the current scale.
        bounding_box = scale_box(self.input_service.bounding_box_zyx, scale)

        # Don't make bricks that are wider than the bounding box at this scale
        brick_shape = np.minimum(self.input_service.preferred_message_shape,
                                 bounding_box[1])
        assert not (brick_shape % block_width).any()

        brick_boxes = boxes_from_grid(bounding_box, brick_shape, clipped=True)

        with Timer(f"Scale {scale}: Preparing bricks", logger):
            boxes_and_masks = []
            for box in brick_boxes:
                mask_block_box = ((box // 2**(5 - scale)) - mask_box_s5[0])
                mask_block_box = mask_block_box.astype(
                    np.int32)  # necessary when scale is > 5
                mask_block_s5 = np.zeros(box_shape(mask_block_box), bool)
                mask_block_s5 = extract_subvol(mask_s5, mask_block_box)
                if mask_block_s5.any():
                    boxes_and_masks.append((box, mask_block_s5))

        batches = [*iter_batches(boxes_and_masks, options["batch-size"])]

        if starting_batch == 0:
            logger.info(f"Scale {scale}: Processing {len(batches)} batches")
        else:
            logger.info(
                f"Scale {scale}: Processing {len(batches) - starting_batch} "
                f"remaining batches from {len(batches)} original batches")

            assert starting_batch < len(batches), \
                f"Can't start at batch {starting_batch}; there are only {len(batches)} in total."
            batches = batches[starting_batch:]

        for batch_index, batch_boxes_and_masks in enumerate(
                batches, start=starting_batch):
            with Timer(f"Scale {scale}: Batch {batch_index:02d}", logger):
                self._execute_batch(scale, batch_index, batch_boxes_and_masks)
Пример #17
0
def sort_block_stats(block_sv_stats,
                     segment_to_body_df=None,
                     output_path=None,
                     agglo_mapping_path=None):
    """
    Sorts the block stats by body ID, IN-PLACE.
    If segment_to_body_df is given, the body_id column is overwritten with mapped IDs.
    If agglo_mapping_path and output_path are given, save the sorted result to an hdf5 file.

    block_sv_stats:
        numpy structured array of blockwise supervoxel counts, with dtype:
        ['body_id', 'segment_id', 'z', 'y', 'x', 'count'].

    segment_to_body_df:
        If loading an agglomeration, must be a 2-column DataFrame, mapping supervoxel-to-body.
        If loading unagglomerated supervoxels, set to None (identity mapping is used).

    output_path:
        If given, sorted result will be saved as hdf5 to this file,
        with the internal dataset name 'stats'

    agglo_mapping_path:
        A path indicating where the segment_to_body_df was loaded from.
        It's saved to the hdf5 attributes for provenance tracking.
    
    """
    with Timer("Assigning body IDs", logger):
        _overwrite_body_id_column(block_sv_stats, segment_to_body_df)

    with Timer(f"Sorting {len(block_sv_stats)} block stats", logger):
        block_sv_stats.sort(
            order=['body_id', 'z', 'y', 'x', 'segment_id', 'count'])

    if output_path:
        with Timer(f"Saving sorted stats to {output_path}"), h5py.File(
                output_path, 'w') as f:
            f.create_dataset('stats', data=block_sv_stats, chunks=True)
            if segment_to_body_df is None:
                f['stats'].attrs['presorted-by'] = 'segment_id'
            else:
                assert agglo_mapping_path
                f['stats'].attrs['presorted-by'] = 'body_id'
                f['stats'].attrs[
                    'agglomeration-mapping-path'] = agglo_mapping_path
Пример #18
0
    def init_boxes(self, volume_service, subset_labels, roi):
        sbm = None
        if roi:
            base_service = volume_service.base_service
            assert isinstance(base_service, DvidVolumeService), \
                "Can't specify an ROI unless you're using a dvid input"

            assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
                "The 'roi' option doesn't support adapters other than 'rescale-level'"
            scale = 0
            if isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"

            server, uuid, _seg_instance = base_service.instance_triple

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, brick_shape)
            seg_box_s0 = seg_box * 2**scale
            seg_box_s5 = seg_box // 2**(5 - scale)

            with Timer(
                    f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})",
                    logger):
                roi_mask_s5, _ = fetch_roi(server,
                                           uuid,
                                           roi,
                                           format='mask',
                                           mask_box=seg_box_s5)

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask.create_from_highres_mask(
                roi_mask_s5, 2**(5 - scale), seg_box, brick_shape)
        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels(
                    [*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError(
                        "Could not find sparse masks for any of the subset-labels"
                    )
            except NotImplementedError:
                sbm = None

        if sbm is None:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])
        else:
            return sbm.sparse_boxes(brick_shape)
def main():
    configure_default_logging()
    initialize_excepthook()
    logger.setLevel(logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--last-mutid', '-i', required=False, type=int)
    parser.add_argument(
        '--num-threads',
        '-t',
        default=0,
        type=int,
        help=
        'How many threads to use when ingesting label indexes (does not currently apply to mappings)'
    )
    parser.add_argument(
        '--num-processes',
        '-p',
        default=0,
        type=int,
        help=
        'How many processes to use when ingesting label indexes (does not currently apply to mappings)'
    )
    parser.add_argument(
        '--batch-size',
        '-b',
        default=100_000,
        type=int,
        help=
        'Data is grouped in batches to the server. This is the batch size, as measured in ROWS of data to be processed for each batch.'
    )
    parser.add_argument('server')
    parser.add_argument('src_uuid')
    parser.add_argument('dest_uuid')
    parser.add_argument('labelmap_instance')
    parser.add_argument(
        'supervoxel_block_stats_h5',
        nargs='?',  # not required if only ingesting mapping
        help=
        f'An HDF5 file with a single dataset "stats", with dtype: {STATS_DTYPE[1:]} (Note: No column for body_id)'
    )
    args = parser.parse_args()

    with Timer() as timer:
        src_info = (args.server, args.src_uuid, args.labelmap_instance)
        dest_info = (args.server, args.dest_uuid, args.labelmap_instance)
        erase_from_labelindexes(src_info,
                                dest_info,
                                args.supervoxel_block_stats_h5,
                                args.batch_size,
                                threads=args.num_threads,
                                processes=args.num_processes,
                                last_mutid=args.last_mutid)
    logger.info(f"DONE. Total time: {timer.timedelta}")
Пример #20
0
def write_stats(stats_df, output_path, logger=None):
    if not output_path.endswith('.pkl.xz'):
        output_path += '.pkl.xz'

    if logger is None:
        logger = logging.getLogger(__name__)

    stats_bytes = stats_df.memory_usage().sum()
    stats_gb = stats_bytes / 1e9
    with Timer(f"Saving segment statistics", logger):
        logger.info(f"Writing stats ({stats_gb:.3f} GB) to {output_path}")
        stats_df.to_pickle(output_path)
Пример #21
0
    def init_boxes(self, volume_service, roi):
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service

        if not roi["server"] or not roi["uuid"]:
            assert isinstance(base_service, DvidVolumeService), \
                "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

        roi["server"] = (roi["server"] or volume_service.server)
        roi["uuid"] = (roi["uuid"] or volume_service.uuid)

        if roi["scale"] is not None:
            scale = roi["scale"]
        elif isinstance(volume_service, ScaledVolumeService):
            scale = volume_service.scale_delta
            assert scale <= 5, \
                "The 'roi' option doesn't support volumes downscaled beyond level 5"
        else:
            scale = 0

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**(5 - scale))
        seg_box_s0 = seg_box * 2**scale
        seg_box_s5 = seg_box // 2**(5 - scale)

        with Timer(
                f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(roi["server"],
                                       roi["uuid"],
                                       roi["name"],
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale))
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
Пример #22
0
    def __init__(self, graph, nbs, partition_df, neuron_df,
                 strong_connections_df):
        with Timer("Loading data", logger):
            graph = load_pickle(graph)
            nbs = load_pickle(nbs)
            partition_df = load_table(partition_df)
            neuron_df = load_table(neuron_df)
            strong_connections_df = load_table(strong_connections_df)

        num_levels = len(nbs.get_bs()) + 1
        assert {*partition_df.columns} > {*range(num_levels)}
        assert num_levels not in partition_df, \
            "partition_df does not match NestedBlockState levels"

        self.graph = graph
        self.nbs = nbs
        self.num_levels = num_levels
        self.partition_df = partition_df
        self.neuron_df = neuron_df
        self.strong_connections_df = strong_connections_df

        with Timer("Initialzing browser", logger):
            self._initialize()
Пример #23
0
    def init_boxes(self, volume_service, roi):
        if not roi:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service
        assert isinstance(base_service, DvidVolumeService), \
            "Can't specify an ROI unless you're using a dvid input"

        assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
            "The 'roi' option doesn't support adapters other than 'rescale-level'"
        scale = 0
        if isinstance(volume_service, ScaledVolumeService):
            scale = volume_service.scale_delta
            assert scale <= 5, \
                "The 'roi' option doesn't support volumes downscaled beyond level 5"

        server, uuid, _seg_instance = base_service.instance_triple

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**(5 - scale))
        seg_box_s0 = seg_box * 2**scale
        seg_box_s5 = seg_box // 2**(5 - scale)

        with Timer(
                f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(server,
                                       uuid,
                                       roi,
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale))
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
Пример #24
0
    def execute(self):
        self._init_services()
        self._init_masks()
        self._log_neuroglancer_links()
        self._sanitize_config()

        # Aim for 2 GB RDD partitions when loading segmentation
        GB = 2**30
        self.target_partition_size_voxels = 2 * GB // np.uint64().nbytes

        # (See note in _init_services() regarding output bounding boxes)
        input_bb_zyx = self.input_service.bounding_box_zyx
        output_bb_zyx = self.output_service.bounding_box_zyx
        self.translation_offset_zyx = output_bb_zyx[0] - input_bb_zyx[0]
        if self.translation_offset_zyx.any():
            logger.info(
                f"Translation offset is {self.translation_offset_zyx[:, ::-1].tolist()}"
            )

        pyramid_depth = self.config["copysegmentation"]["pyramid-depth"]
        slab_depth = self.config["copysegmentation"]["slab-depth"]

        # Process data in Z-slabs
        output_slab_boxes = list(slabs_from_box(output_bb_zyx, slab_depth))
        max_depth = max(
            map(lambda box: box[1][0] - box[0][0], output_slab_boxes))
        logger.info(
            f"Processing data in {len(output_slab_boxes)} slabs (max depth={max_depth}) for {pyramid_depth} pyramid levels"
        )

        if self.config["copysegmentation"]["compute-block-statistics"]:
            self._init_stats_file()

        # Read data and accumulate statistics, one slab at a time.
        for slab_index, output_slab_box in enumerate(output_slab_boxes):
            with Timer() as timer:
                self._process_slab(slab_index, output_slab_box)
            logger.info(
                f"Slab {slab_index}: Total processing time: {timer.timedelta}")

            delay_minutes = self.config["copysegmentation"][
                "delay-minutes-between-slabs"]
            if delay_minutes > 0 and slab_index != len(output_slab_boxes) - 1:
                logger.info(
                    f"Delaying {delay_minutes} before continuing to next slab..."
                )
                time.sleep(delay_minutes * 60)

        logger.info(f"DONE copying/downsampling all slabs")
Пример #25
0
    def run(self, kill_cluster=True):
        """
        Run the workflow by calling the subclass's execute() function
        (with some startup/shutdown steps before/after).
        """
        logger.info(f"Working dir: {os.getcwd()}")

        # The execute() function is run within these nested contexts.
        # See contexts.py
        workflow_name = self.config['workflow-name']
        with Timer(f"Running {workflow_name} with {self.num_workers} workers", logger), \
             LocalResourceManager(self.config["resource-manager"]), \
             WorkflowClusterContext(self, True, not kill_cluster), \
             environment_context(self.config["environment-variables"], self), \
             WorkerDaemons(self):
            self.execute()
Пример #26
0
    def init_boxes(self, volume_service, roi, chunk_shape_s0):
        """
        Return a set of bounding boxes to tile the given ROI.
        Scale 0 of the volume service should correspond to full-res data,
        which is 32x higher-res than ROI resolution.
        """
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    chunk_shape_s0,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service

        if not roi["server"] or not roi["uuid"]:
            assert isinstance(base_service, DvidVolumeService), \
                "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

        roi["server"] = (roi["server"] or volume_service.server)
        roi["uuid"] = (roi["uuid"] or volume_service.uuid)

        assert not (chunk_shape_s0 % 2**5).any(), \
            "If using an ROI, select a chunk shape that is divisible by 32"

        seg_box_s0 = volume_service.bounding_box_zyx
        seg_box_s0 = round_box(seg_box_s0, 2**5)
        seg_box_s5 = seg_box_s0 // 2**5

        with Timer(
                f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(roi["server"],
                                       roi["uuid"],
                                       roi["name"],
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box_s0, 2**5)
        boxes = sbm.sparse_boxes(chunk_shape_s0)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
Пример #27
0
    def init_brickwall(self):
        input_config = self.config["input"]
        mask_input_config = self.config["mask-input"]
        mgr_config = self.config["resource-manager"]
        options = self.config["sparseblockstats"]
        
        resource_mgr_client = ResourceManagerClient( mgr_config["server"], mgr_config["port"] )
        input_service = VolumeService.create_from_config( input_config, resource_mgr_client )
        mask_service = VolumeService.create_from_config( mask_input_config, resource_mgr_client )
        
        assert (input_service.preferred_message_shape == mask_service.preferred_message_shape).all(), \
            "This workflow assumes that the input and the mask-input use the same brick grid."

        assert not (input_service.preferred_message_shape % input_service.block_width).any(), \
            "input brick grid spacing must be a multipe of the input's block-width"
        assert not (mask_service.preferred_message_shape % mask_service.block_width).any(), \
            "mask brick grid spacing must be a multipe of the input's block-width"

        is_supervoxels = False
        if isinstance(mask_service.base_service, DvidVolumeService):
            is_supervoxels = mask_service.base_service.supervoxels

        # Load body list and eliminate duplicates
        subset_labels = load_body_list(options["mask-labels"], is_supervoxels)
        subset_labels = set(subset_labels)

        if not subset_labels:
            raise RuntimeError("You didn't specify any mask subset labels. "
                               "If you want to compute block stats for an entire segmentation volume, use the CopySegmentation workflow.")

        sbm = mask_service.sparse_block_mask_for_labels(subset_labels)
        if ((sbm.box[1] - sbm.box[0]) == 0).any():
            raise RuntimeError("Could not find sparse masks for any of the mask-labels")

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes
            brickwall = BrickWall.from_volume_service(input_service, 0, None, self.client, target_partition_size_voxels, 0, sbm)

            # Pad if necessary to ensure that all fetched bricks are block-aligned
            block_shape = 3*(input_service.block_width,)
            brickwall = brickwall.fill_missing(input_service.get_subvolume, Grid(block_shape))

        return brickwall
Пример #28
0
def compute_fragment_edges(edges_df, bois, processes=0):
    """
    For each edge group, search for paths that can connect the BOIs in the group.
    Each group is a "fragment", a.k.a. "task".
    Return a new edge DataFrame, where each edge is associated with a group and
    a fragment within that group, indicated by group_cc and cc_task, respectively.
    
    Args:
        edges_df:
            An edge table as described in extract_assignment_fragments(), above,
            with the additional requirement that the table is in "normalized" form,
            i.e. label_a < label_b.

        bois:
            List of BOIs
    """
    fragments = extract_fragments(edges_df, bois, processes)

    with Timer("Extracting edges for each fragment from full table", logger):
        edges_df = edges_df.query('group_cc in @fragments.keys()')

        cc_col = []
        task_col = []
        frag_cols = []
        for group_cc, group_fragments in fragments.items():
            for task_index, frag in enumerate(group_fragments):
                cc_col.extend([group_cc] * (len(frag) - 1))
                task_col.extend([task_index] * (len(frag) - 1))
                frag_edges = list(zip(frag[:-1], frag[1:]))
                frag_cols.extend(frag_edges)

        frag_cols = np.array(frag_cols, dtype=np.uint64)
        frag_cols.sort(axis=1)

        fragment_edges_df = pd.DataFrame(frag_cols,
                                         columns=['label_a', 'label_b'])
        fragment_edges_df['group_cc'] = cc_col
        fragment_edges_df['cc_task'] = task_col

        fragment_edges_df = fragment_edges_df.merge(
            edges_df, 'left', ['group_cc', 'label_a', 'label_b'])
        return fragment_edges_df
Пример #29
0
        def process_and_save(body):
            tbars = _fetch_synapses(body)
            valid_mitos = _fetch_mito_ids(body)

            # TODO:
            #   Does the stdout_redirected() mechanism work correctly in the context of multiprocessing?
            #   If not, I should probably just use a custom logging handler instead.
            with open(f"body-logs/{body}.log",
                      "w") as f, stdout_redirected(f), Timer() as timer:
                processed_tbars = []
                if len(tbars) == 0:
                    logging.getLogger(__name__).warning(
                        f"Body {body}: No synapses found")

                if len(valid_mitos) == 0:
                    logging.getLogger(__name__).warning(
                        f"Body {body}: Failed to fetch mito supervoxels")
                    processed_tbars = initialize_results(body, tbars)

                if len(valid_mitos) and len(tbars):
                    processed_tbars = measure_tbar_mito_distances(
                        body_svc,
                        mito_svc,
                        body,
                        tbars=tbars,
                        valid_mitos=valid_mitos)

            if len(processed_tbars) > 0:
                processed_tbars.to_csv(f'{output_dir}/{body}.csv',
                                       header=True,
                                       index=False)
                with open(f'{output_dir}/{body}.pkl', 'wb') as f:
                    pickle.dump(processed_tbars, f)

            if len(tbars) == 0:
                return (body, 0, 'no-synapses', timer.seconds)

            if len(valid_mitos) == 0:
                return (body, len(processed_tbars), 'no-mitos', timer.seconds)

            return (body, len(tbars), 'success', timer.seconds)
def _fetch_body_mito_seg(mito_src, body_mask, mask_box, scale, valid_mito_mapper, logger):
    """
    Return the mito segmentation for only those mitos which
    overlap with the given body mask (not elsewhere).

    Args:
        mito_src:
            VolumeService to obtain mito segmentation
        body_mask:
            Volume with labels 1+2 as described in _fetch_body_mask()
        valid_mito_mapper:
            LabelMapper that keeps only valid mitos when its apply_with_default() method is called.
    """
    with Timer("Fetching mito segmentation", logger):
        assert _have_flyemflows and isinstance(mito_src, VolumeService)
        mito_seg = mito_src.get_subvolume(mask_box, scale)

    if valid_mito_mapper:
        return valid_mito_mapper.apply_with_default(mito_seg)

    core_body_mask = (body_mask == 2)
    body_mito_seg = np.where(core_body_mask, mito_seg, 0)

    # Due to downsampling discrepancies between the mito seg and neuron seg,
    # mito from neighboring neurons may slightly overlap this neuron.
    # Keep only mitos which have more of their voxels in the body mask than not.
    #
    # FIXME:
    #   This heuristic fails at the volume edge, where we might see just
    #   part of the mito.
    #   Need to overwrite small mitos on the volume edge with FACE_MARKER
    #   to indicate that they can't be trusted, and if such a mito is
    #   the "winning" mito, then we need to try a different search config.
    body_mito_sizes = pd.Series(body_mito_seg.ravel()).value_counts()
    del body_mito_seg
    mito_sizes = pd.Series(mito_seg.ravel()).value_counts()
    mito_sizes, body_mito_sizes = mito_sizes.align(body_mito_sizes, fill_value=0)
    core_mitos = {*mito_sizes[(body_mito_sizes > mito_sizes / 2)].index} - {0}
    core_mito_seg = apply_mask_for_labels(mito_seg, core_mitos, inplace=True)
    return core_mito_seg