def sample_labels(point_source, stats_df, check_scale, threads, processes): from neuclease.dvid import fetch_labels_batched from flyemflows.volumes import DvidVolumeService if isinstance(point_source, DvidVolumeService): return fetch_labels_batched(*point_source.instance_triple, stats_df[[*'zyx']] // (2**check_scale), supervoxels=point_source.supervoxels, scale=check_scale, batch_size=1000, threads=threads, processes=processes) import multiprocessing as mp import dask from dask.diagnostics import ProgressBar if threads: pool = mp.pool.ThreadPool(threads) else: pool = mp.pool.Pool(processes) dask.config.set(scheduler='processes') with pool, dask.config.set(pool=pool), ProgressBar(): centroids = stats_df[[*'zyx']] // (2**check_scale) labels = point_source.sample_labels(centroids, scale=check_scale) return labels
def test_fetch_labels_batched(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation') coords = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 0, 4], [0, 0, 4]] labels = fetch_labels_batched(*instance_info, coords, supervoxels=False, batch_size=2, threads=2) assert labels.dtype == np.uint64 assert (labels == 1).all() # See init_labelmap_nodes() in conftest.py labels = fetch_labels_batched(*instance_info, coords, supervoxels=True, batch_size=2, threads=2) assert labels.dtype == np.uint64 assert (labels == [1, 1, 1, 2, 2, 2]).all() # See init_labelmap_nodes() in conftest.py labels = fetch_labels_batched(*instance_info, coords, supervoxels=False, batch_size=2, processes=2) assert labels.dtype == np.uint64 assert (labels == 1).all() # See init_labelmap_nodes() in conftest.py labels = fetch_labels_batched(*instance_info, coords, supervoxels=True, batch_size=2, processes=2) assert labels.dtype == np.uint64 assert (labels == [1, 1, 1, 2, 2, 2]).all() # See init_labelmap_nodes() in conftest.py
def main(): RESULTS_PKL_PATH = sys.argv[1] if len(sys.argv) == 3: PROCESSES = int(sys.argv[2]) else: PROCESSES = 4 # Calculate the difference in resolution between the stored mito segmentation and neuron segmenation. # If they differ, it must be by a power of 2. mito_res = fetch_info(*MITO_SEG)["Extended"]["VoxelSize"][0] assert mito_res % NEIGHBORHOOD_RES == 0 assert np.log2(mito_res / NEIGHBORHOOD_RES) == int(np.log2(mito_res / NEIGHBORHOOD_RES)), \ "This script assumes that the mito resolution and neighborhood resolution differ by a power of 2." mito_res_scale_diff = int(np.log2(mito_res // NEIGHBORHOOD_RES)) with open(RESULTS_PKL_PATH, 'rb') as f: mc_df = pickle.load(f) new_names = {col: col.replace(' ', '_') for col in mc_df.columns} new_names['result'] = 'proofreader_count' mc_df = mc_df.rename(columns=new_names) print("Evaluating mito count results") results = compute_parallel(partial(_task_results, mito_res_scale_diff), iter_batches( mc_df.drop_duplicates('neighborhood_id'), 1), total=len(mc_df), processes=PROCESSES, leave_progress=True, ordered=False) cols = [ 'neighborhood_id', 'neighborhood_origin', 'proofreader_count', 'mito_id_count', 'mito_ids', 'mito_sizes', 'num_ccs', 'mito_cc_ids', 'mito_cc_sizes', 'ng_link' ] df = pd.DataFrame(results, columns=cols) # Add columns for cell type (from neuprint) print("Fetching neuron cell types") origins_df = pd.DataFrame(df['neighborhood_origin'].tolist(), columns=[*'xyz']) df['body'] = fetch_labels_batched(*NEURON_SEG, origins_df[[*'zyx']].values, processes=8) neurons_df, _ = fetch_neurons(df['body'].unique()) neurons_df = neurons_df.rename(columns={ 'bodyId': 'body', 'type': 'body_type', 'instance': 'body_instance' }) df = df.merge(neurons_df[['body', 'body_type', 'body_instance']], 'left', on='body') df['body_type'].fillna("", inplace=True) df['body_instance'].fillna("", inplace=True) # Append roi column print("Determining ROIs") determine_point_rois(*NEURON_SEG[:2], NEUPRINT_CLIENT.primary_rois, origins_df) df['roi'] = origins_df['roi'] # Results only path = 'mito-seg-counts.pkl' print(f"Writing {path}") with open(path, 'wb') as f: pickle.dump(df, f) path = 'mito-seg-counts.tab-delimited.csv' print(f"Writing {path}") df.to_csv(path, sep='\t', header=True, index=False) # Full results (with task info columns) df = df.merge( mc_df.drop(columns=['neighborhood_origin', 'proofreader_count']), 'left', on='neighborhood_id') path = 'full-results-with-mito-seg-counts.pkl' print(f"Writing {path}") with open(path, 'wb') as f: pickle.dump(df, f) path = 'full-results-with-mito-seg-counts.tab-delimited.csv' print(f"Writing {path}") df.to_csv(path, sep='\t', header=True, index=False) print("DONE")
def fetch_vnc_statuses(server, uuid): """ Fetch all body statuses from the body annotation key-value, but also include all soma bodies (regardless of status) and bodies that were annotated in the neck. Also fetch the number of synapses for each. Example: .. code-block:: ipython In [72]: ann = fetch_vnc_statuses('emdata5.janelia.org:8400', '73f39bea795f48e18feafb033b544ae5') [2021-05-06 11:32:14,581] INFO Pre-sorting 15143 coordinates by block index... [2021-05-06 11:32:14,587] INFO Pre-sorting 15143 coordinates by block index took 0:00:00.006287 [2021-05-06 11:32:14,588] INFO Fetching labels from DVID... [2021-05-06 11:32:26,116] INFO Fetching labels from DVID took 0:00:11.527091 [2021-05-06 11:32:31,480] WARNING There are 129 duplicate bodies in the results, due to multi-soma and/or multi-cervical bodies! In [73]: ann.columns Out[73]: Index(['status', 'user', 'naming user', 'instance', 'status user', 'comment', 'json', 'soma_x', 'soma_y', 'soma_z', 'has_soma', 'neck_x', 'neck_y', 'neck_z', 'is_cervical'], dtype='object') In [75]: ann.query('has_soma or is_cervical')[['status', 'status user', 'has_soma', 'is_cervical', ...: 'soma_x', 'soma_y', 'soma_z', 'neck_x', 'neck_y', 'neck_z']] Out[75]: status status user has_soma is_cervical soma_x soma_y soma_z neck_x neck_y neck_z body 10000 Prelim Roughly traced False True 0 0 0 24481 36044 67070 100000 Soma Anchor True False 22959 20811 7254 0 0 0 100002 Soma Anchor True False 28216 35641 61443 0 0 0 10002 Prelim Roughly traced False True 0 0 0 23217 35252 67070 100031 Prelim Roughly traced smithc False True 0 0 0 23263 38354 67070 ... ... ... ... ... ... ... ... ... ... ... 97550 Cervical Anchor False True 0 0 0 23341 38451 67070 99837 Prelim Roughly traced cookm False True 0 0 0 22665 38397 67070 0 True False 14912 31188 19347 0 0 0 0 True False 23125 16634 12777 0 0 0 167778 True False 22324 6881 16642 0 0 0 [17188 rows x 10 columns] """ soma_df = fetch_sphere_annotations(server, uuid, 'soma-bookmarks', 'segmentation') soma_df = soma_df[['body', *'xyz']] soma_df['has_soma'] = True soma_df = soma_df.rename(columns={k: f'soma_{k}' for k in 'xyz'}) neck_df = fetch_all_elements(server, uuid, 'neck-points', format='pandas') neck_df = neck_df[[*'xyz']] neck_df['body'] = fetch_labels_batched(server, uuid, 'segmentation', neck_df[[*'zyx']].values, processes=4, batch_size=1000) neck_df = neck_df.rename(columns={k: f'neck_{k}' for k in 'xyz'}) neck_df['is_cervical'] = True ann_df = fetch_body_annotations(server, uuid, 'segmentation_annotations') ann_df = ann_df.reset_index() ann_df = ann_df.merge(soma_df, 'outer', on='body') ann_df = ann_df.merge(neck_df, 'outer', on='body') ann_df['has_soma'].fillna(False, inplace=True) ann_df['is_cervical'].fillna(False, inplace=True) for c in ann_df.columns: if c[-2:] in ('_x', '_y', '_z'): ann_df[c] = ann_df[c].fillna(0).astype(int) for c in ('status', 'user', 'naming user', 'instance', 'status user', 'comment'): ann_df[c].fillna("", inplace=True) dupes = ann_df['body'].duplicated().sum() if dupes: logger.warn( f"There are {dupes} duplicate bodies in the results, due to multi-soma and/or multi-cervical bodies!" ) del ann_df['body ID'] ann_df = ann_df.set_index('body') bodies = ann_df.index.drop_duplicates().values ann_df['tbars'] = fetch_counts(server, uuid, 'synapses_labelsz', bodies, 'PreSyn', format='pandas') ann_df['psds'] = fetch_counts(server, uuid, 'synapses_labelsz', bodies, 'PostSyn', format='pandas') ann_df['synapses'] = ann_df.eval('tbars + psds') return ann_df
def correct_centroids(config, stats_df, check_scale=0, verify=False, threads=0, processes=8): import numpy as np import pandas as pd from neuclease.util import tqdm_proxy, compute_parallel, Timer from neuclease.dvid import fetch_labels_batched from flyemflows.volumes import VolumeService, DvidVolumeService with Timer("Pre-sorting points by block", logger): stats_df['bz'] = stats_df['by'] = stats_df['bx'] = np.int32(0) stats_df[['bz', 'by', 'bx']] = stats_df[[*'zyx']] // 64 stats_df.sort_values(['bz', 'by', 'bx'], inplace=True) stats_df.drop(columns=['bz', 'by', 'bx'], inplace=True) sparsevol_source = VolumeService.create_from_config(config['mito-sparsevol-source']) if config['mito-point-source'] is None: point_source = sparsevol_source else: point_source = VolumeService.create_from_config(config['mito-point-source']) if isinstance(point_source, DvidVolumeService): stats_df['centroid_label'] = fetch_labels_batched(*point_source.instance_triple, stats_df[[*'zyx']] // (2**check_scale), supervoxels=point_source.supervoxels, scale=check_scale, batch_size=1000, threads=threads, processes=processes) else: import multiprocessing as mp import dask from dask.diagnostics import ProgressBar if threads: pool = mp.pool.ThreadPool(threads) else: pool = mp.pool.Pool(processes) dask.config.set(scheduler='processes') with pool, dask.config.set(pool=pool), ProgressBar(): centroids = stats_df[[*'zyx']] // (2**check_scale) stats_df['centroid_label'] = point_source.sample_labels( centroids, scale=check_scale ) mismatched_mitos = stats_df.query('centroid_label != mito_id').index logger.info(f"Correcting {len(mismatched_mitos)} mismatched mito centroids") _find_mito = partial(find_mito, *sparsevol_source.instance_triple) mitos_and_coords = compute_parallel(_find_mito, mismatched_mitos, ordered=False, threads=threads, processes=processes) corrected_df = pd.DataFrame(mitos_and_coords, columns=['mito_id', *'zyx']).set_index('mito_id') stats_df.loc[corrected_df.index, [*'zyx']] = corrected_df[[*'zyx']] stats_df.loc[corrected_df.index, 'centroid_type'] = 'adjusted' # Sanity check: they should all be correct now! if verify: new_centroids = stats_df.loc[mismatched_mitos, [*'zyx']].values new_labels = fetch_labels_batched(*sparsevol_source.instance_triple, new_centroids, supervoxels=True, threads=threads, processes=processes) if (new_labels != mismatched_mitos).any(): logger.error("Some mitos remained mismstached!") return stats_df
def update_localized_edges(server, uuid, seg_instance, edges_df, processes=16): """ Use the coordinates in the edge table to update the label_a/label_b columns (by fetching the labels from dvid at the given UUID). Then, since the labels MAY have changed, re-compute the central-most edges (for "direct" adjacencies) and closest-approaching edges (for nearby "adjacencies"). This takes a few minutes. """ ref_seg = (server, uuid, seg_instance) # Update to latest node with Timer(f"Updating body labels for uuid {uuid[:4]}", logger): edges_df['label_a'] = fetch_labels_batched(*ref_seg, edges_df[['za', 'ya', 'xa']].values, processes=processes) edges_df['label_b'] = fetch_labels_batched(*ref_seg, edges_df[['zb', 'yb', 'xb']].values, processes=processes) swap_df_cols(edges_df, None, edges_df.eval('label_a > label_b'), ('a', 'b')) # Discard already-merged edges edges_df = edges_df.query('label_a != label_b') # Now that we've relabeled some points, there may be duplicate edges in the table. # De-duplicate them by choosing the best ones. # (This takes a while) with Timer("Re-selecting central-most direct edges", logger): direct_edges_df = edges_df.loc[edges_df['distance'] == 1.0].copy() # If we really want to choose the *best* edge, we should do a proper centroid calculation. # But that takes a long time, and there aren't likely to be all that many cases where it makes a difference. #direct_edges_df = select_central_edges(direct_edges_df) # Instead, just drop duplicates in arbitrary order. direct_edges_df.drop_duplicates(['group', 'label_a', 'label_b'], inplace=True) with Timer("Re-selecting closest-approaching nearby edges", logger): # This doesn't take as long, partly because there are # usually fewer nearby edges than direct edges. nearby_edges_df = edges_df.loc[edges_df['distance'] >= 1.0] nearby_edges_df = select_closest_edges(nearby_edges_df) # FIXME: Should we also update the group_cc? # If any splits have occurred, I guess the group_cc is no longer a single component. # Bu when we analyze it for 'fragments', the results will be correct. # append_group_ccs(...) # Combine (direct first) edges_df = pd.concat((direct_edges_df, nearby_edges_df)) # After updating, it's technically possible that a nearby # edge now has the same labels as a direct edge. # Drop duplicates so we keep only the direct edge. edges_df = edges_df.drop_duplicates(['group', 'label_a', 'label_b'], keep='first') return edges_df
def extract_assignment_fragments(server, uuid, syn_instance, edge_table, boi_rois=None, min_tbars_in_roi=2, min_psds_in_roi=10, fragment_rois=None, processes=16, *, request_processes=None, synapse_table=None, boi_table=None, seg_instance=None, update_edges=False): """ Using the edge table emitted from the FindAdjacencies workflow, emit a table of "fragments" (sets of bodies) which connect two "bodies of interest" (BOIs, described below). The emitted fragments be used to generate focused assignments and/or merge review assignments. Essentially, we construct an adjacency graph from the edge table, and then search for any paths that can connect two BOIs: BOI - b - b - b - ... - b - BOI The path from one BOI to another is called a "fragment". If the path contains only the two BOIs and no other bodies, then the two BOIs are directly adjacent, with no intervening bodies: BOI - BOI In those cases, it is possible to create a "focused proofreading" task from the body pair. In all other cases, you can create a "merge review" task for the fragment. See the following functions: generate_mergereview_assignments_from_df() neuclease.focused.asssignments.generate_focused_assignments() Exactly which bodies are considered "bodies of interest" is determined by the presence of T-bars and PSDs within the specified ROIs (boi_rois, if provided), thresholded by the given criteria. If no boi_rois are specified, then all T-bars and PSDs in the given bodies are counted. Additionally, the final fragment set can be filtered to exclude fragments that travel outside of a given list of ROIs. See the explanation of the edge_table parameter for an explanation of the FindAdjacencies output. Tip: To visualize the adjacency graph for a subset of rows in either the input edge table or the output tables, see display_graph(), below. Args: server, uuid, syn_instance: DVID synapse (annotation) instance edge_table: A DataFrame as explained below, or a filepath to a .npy file that can be loaded into one. The FindAdjacencies workflow finds the sites at which preselected bodies are adjacent to one another. The user provides a list of body "groups" which are analyzed independently. In addition to "direct" adjacencies between touching bodies (distance=1.0), the workflow can be configured to also search for near-adjacencies, in which bodies come close to each other without physically touching (distance > 1.0). Each adjacency is referred to as an edge, and the results are emitted as an "edge table" with the following columns: [label_a, label_b, za, ya, xa, zb, yb, xb, distance, group, group_cc] with the following definitions: label_a, label_b: Body IDs (assuming the FindAdjacencies workflow was executed on a body input source) za, ya, xa, zb, yb, xb: Coordinates that fall within the body on each side of the edge. distance: The euclidean distance between the two coordinates. For "direct" adjacencies, distance is always 1.0. For "nearby" adjacencies, distance is always > 1.0. group: The original body groups the user selected for adjacency analysis. The exact group ID values are arbitrary (not necessarily consecutive), and were provided by the user that ran the FindAdjacencies workflow. Note that one body may exist in more than one group. group_cc: An independent subgraph is constructed for each group (from the group's 'edges'). A connected components analysis is then performed on each subgraph, and a unique ID is assigned to each CC. Although the connected components are computed on each group in isolation, the assigned group_cc values are unique across all of the groups in the table. The group_cc values are otherwise arbitrary. (That is, they aren't necessarily consecutive, or related to the other CC IDs in their group.) For example, group 123 might be found to contain two connected components, labeled group_cc=53412 and group_cc=82344 boi_rois: Optional. List of ROI instance names. If provided, only T-bars and PSDs that fall within the given list of ROIs will be counted when determining which bodies are considered BOIs. Otherwise, all synapses in the volume are considered. min_tbars_in_roi, min_psds_in_roi: The criteria for determining what counts as a BOI. As indicated in the argument names, only synapse points WITHIN the ROI(s) will be counted towards these requirements. fragment_rois: Optional. Any fragments that extend outside of the given list of ROIs will be discarded from the result, even though they contained BOIs that matched the BOI criteria. processes: Various steps in this function can be parallelized. This specifies how much parallelism to use. request_processes: By default, requests to DVID are also made in parallel, with parallelism set by the 'processes' argument. But if you would like to reduce (or increase) the processes used when fetching from dvid (e.g. to reduce burden on the dvid server), specify a separate parallelism level via request_processes. synapse_table: Optional. If you already fetched the synapses from DVID (via fetch_synapses_in_batches() or fetch_roi_synapses()), you can provide it here (or a file path to a stored .npy file), in which case this function will not need to fetch the synapses from DVID. (Not needed at all if you're providing your own boi_table.) boi_table: Optional. Normally this function computes the boi_table directly from the synapse points, but if you already have it handy, you can pass it in here. It will still be filtered according to min_tbars_in_roi and min_psds_in_roi, so the BOIs used will be accurate as long as the table contains all of the BOIs you might be interested in, or more. seg_instance: By default, this BOIs in this table will be extracted from the segmentation instance that is associated with the given synapse annotation instance. But if you would like to use a different segmentation instance, provide it here. update_edges: If True, re-fetch the body label under each coordinate in the table, and re-select the "best" (most central) edge for body pairs with multiple edges. This takes a while to run. It's only necessary if your edge table is likely to be out-of-date with respect to the given UUID. Returns: (focused_fragments_df, mr_fragments_df, bois), where: focused_fragments_df: A DataFrame consisting of rows suitable for "focused proofreading", i.e. every row (edge) is a single-edge fragment. mr_fragments_df: A DataFrame consisting of edges that belong to fragments with more than one edge, meaning they are not suitable for "focused proofreading" and are instead suitable for "merge review". The fragment IDs are the (group_cc, cc_task) columns. Edges with the same fragment ID should be grouped together into the same merge review task. mr_endpoint_df: A DataFrame containing only the 'endpoint' bodies of the MR fragments, one pair per row. The columns in which the bodies are found (a vs b) will not be the same as they appear in mr_fragments_df, but the group_cc and cc_task columns will correspond to the appropriate rows in the full DataFrame. This 'endpoint' dataframe does not contain enough information to create merge review tasks (it lacks information about the intermediate bodies that connect the two endpoints) but it is more convenient to analyze when computing certain statistics to describe the types of merge review tasks that were found. boi_table: A DataFrame containing the BOIs (based on the criteria given above) that were used to selecting fragments, indexed by body, with columns ['PreSyn', 'PostSyn']. (See ``neuclease.dvid.annotation.determine_bodies_of_interest()``.) Note that the returned fragments do not necessarily cover every BOI in this list. """ if isinstance(boi_rois, str): boi_rois = [boi_rois] if isinstance(fragment_rois, str): fragment_rois = [fragment_rois] request_processes = request_processes or processes if seg_instance is None: syn_info = fetch_instance_info(server, uuid, syn_instance) seg_instance = syn_info["Base"]["Syncs"][0] ref_seg = (server, uuid, seg_instance) # Load edges (if necessary), pre-filter, normalize edges_df = load_edges(edge_table) if update_edges: # Update the table for consistency with the given UUID, # and re-post-process it to find the correct "central" and "closest" edges, # (in case some groups were merged). edges_df = update_localized_edges(*ref_seg, edges_df, request_processes) # Technically, you could provide 0 for either of these, # but that's probably a mistake on your part. # (Unless you specifically appended some 0-synapse bodies to your # synapse table, and expect those to be considered BOIs.) assert min_tbars_in_roi >= 1 and min_psds_in_roi >= 1 if boi_table is not None: boi_table = boi_table.query( 'PreSyn >= @min_tbars_in_roi or PostSyn >= @min_psds_in_roi') else: assert not boi_rois, \ "You can't specify boi_rois if you're providing your own boi_table" # Fetch synapse labels and determine the set of BOIs boi_table = determine_bodies_of_interest(server, uuid, syn_instance, boi_rois, min_tbars_in_roi, min_psds_in_roi, request_processes, synapse_table=synapse_table) assert boi_table.index.name == 'body' assert set(boi_table.columns) == {'PreSyn', 'PostSyn'} bois = set(boi_table.index) # We're trying to connect BOIs to each other. # Therefore, we're not interested in groups of bodies # that don't contain at least 2 BOIs. edges_df = filter_groups_for_min_boi_count(edges_df, bois, ['group_cc'], 2) # Find the paths ('fragments', a.k.a. 'tasks') that connect BOIs within each group. fragment_edges_df = compute_fragment_edges(edges_df, bois, processes) if fragment_rois is not None: # Drop fragments that extend outside of the specified ROIs. fragment_edges_df = filter_fragments_for_roi(server, uuid, fragment_rois, fragment_edges_df) # If a group itself contained multiple CCs, it's possible that the BOIs were separated # into separate tasks, meaning that each individual task no longer satisfies the 2-BOI requirement. # Refilter. fragment_edges_df = filter_groups_for_min_boi_count( fragment_edges_df, bois, ['group_cc', 'cc_task'], 2) # Fetch the supervoxel IDs for each edge. with Timer("Sampling supervoxel IDs", logger): points_a = fragment_edges_df[['za', 'ya', 'xa']].values points_b = fragment_edges_df[['zb', 'yb', 'xb']].values fragment_edges_df['sv_a'] = fetch_labels_batched( *ref_seg, points_a, True, processes=request_processes) fragment_edges_df['sv_b'] = fetch_labels_batched( *ref_seg, points_b, True, processes=request_processes) # Divide into 'focused' and 'merge review' fragments, # i.e. single-edge fragments and multi-edge fragments focused_fragments_df = ( fragment_edges_df.groupby(['group_cc', 'cc_task']).filter( lambda task_df: len(task_df) == 1) # exactly one edge .copy()) mr_fragments_df = ( fragment_edges_df.groupby(['group_cc', 'cc_task']).filter( lambda task_df: len(task_df) > 1) # multiple edges .copy()) num_focused_fragments = len(focused_fragments_df) num_mr_fragments = len( mr_fragments_df.drop_duplicates(['group_cc', 'cc_task'])) fragment_bodies = pd.unique(fragment_edges_df[['label_a', 'label_b' ]].values.reshape(-1)) num_fragment_bois = len( set(fragment_bodies).intersection(set(boi_table.index))) logger.info(f"Emitting {num_focused_fragments} focused fragments and " f"{num_mr_fragments} merge-review fragments, " f"covering {num_fragment_bois} BOIs out of {len(boi_table)}.") with Timer("Merging synapse counts onto results", logger): focused_fragments_df = focused_fragments_df.merge(boi_table, 'left', left_on='label_a', right_index=True) focused_fragments_df = focused_fragments_df.merge(boi_table, 'left', left_on='label_b', right_index=True, suffixes=('_a', '_b')) mr_fragments_df = mr_fragments_df.merge(boi_table, 'left', left_on='label_a', right_index=True) mr_fragments_df = mr_fragments_df.merge(boi_table, 'left', left_on='label_b', right_index=True, suffixes=('_a', '_b')) with Timer("Constructing merge-review 'endpoint' dataframe", logger): try: mr_endpoint_df = construct_mr_endpoint_df(mr_fragments_df, bois) except BaseException as ex: logger.error(str(ex)) logger.error( "Failed to construct the merge-review 'endpoint' dataframe. Returning None." ) mr_endpoint_df = None return focused_fragments_df, mr_fragments_df, mr_endpoint_df, boi_table
#!/bin/env # Use python 3.6 or greater # python map_csv_to_segmentation.py emdata4:8900 6f2cb segmentation clean-synapses-6f2cb-sub3-roi-added.csv synIDs_synapses-6f2cb-rois-bodyIDs.csv import os import sys import logging import requests import numpy as np import pandas as pd from neuclease.dvid import fetch_labels_batched dvid_server = sys.argv[1] dvid_uuid = sys.argv[2] segmentation_inst = sys.argv[3] infile = sys.argv[4] outfile = sys.argv[5] master_seg = (dvid_server, dvid_uuid, segmentation_inst) df = pd.read_csv(infile) labels = fetch_labels_batched(*master_seg, df[['z', 'y', 'x']].values, threads=32) df['body'] = labels df.to_csv(outfile, index=False)
def fetch_roi_synapses(server, uuid, synapses_instance, rois, fetch_labels=False, return_partners=False, processes=16): """ Fetch the coordinates and (optionally) body labels for all synapses that fall within the given ROIs. Args: server: DVID server, e.g. 'emdata4:8900' uuid: DVID uuid, e.g. 'abc9' synapses_instance: DVID synapses instance name, e.g. 'synapses' rois: A single DVID ROI instance names or a list of them, e.g. 'EB' or ['EB', 'FB'] fetch_labels: If True, also fetch the supervoxel and body label underneath each synapse, returned in columns 'sv' and 'body'. return_partners: If True, also return the partners table. processes: How many parallel processes to use when fetching synapses and supervoxel labels. Returns: pandas DataFrame with columns: ``['z', 'y', 'x', 'kind', 'conf']`` and ``['sv', 'body']`` (if ``fetch_labels=True``) If return_partners is True, also return the partners table. Example: df = fetch_roi_synapses('emdata4:8900', '3c281', 'synapses', ['PB(L5)', 'PB(L7)'], True, 8) """ # Late imports to avoid circular imports in dvid/__init__ from neuclease.dvid import fetch_combined_roi_volume, determine_point_rois, fetch_labels_batched, fetch_mapping, fetch_mappings assert rois, "No rois provided, result would be empty. Is that what you meant?" if isinstance(rois, str): rois = [rois] # Determine name of the segmentation instance that's # associated with the given synapses instance. syn_info = fetch_instance_info(server, uuid, synapses_instance) seg_instance = syn_info["Base"]["Syncs"][0] logger.info(f"Fetching mask for ROIs: {rois}") # Fetch the ROI as a low-res array (scale 5, i.e. 32-px resolution) roi_vol_s5, roi_box_s5, overlapping_pairs = fetch_combined_roi_volume( server, uuid, rois) if len(overlapping_pairs) > 0: logger.warning( "Some ROIs overlapped and are thus not completely represented in the output:\n" f"{overlapping_pairs}") # Convert to full-res box roi_box = (2**5) * roi_box_s5 # fetch_synapses_in_batches() requires a box that is 64-px-aligned roi_box = round_box(roi_box, 64, 'out') logger.info("Fetching synapse points") # points_df is a DataFrame with columns for [z,y,x] points_df, partners_df = fetch_synapses_in_batches(server, uuid, synapses_instance, roi_box, processes=processes) # Append a 'roi_name' column to points_df logger.info("Labeling ROI for each point") determine_point_rois(server, uuid, rois, points_df, roi_vol_s5, roi_box_s5) logger.info("Discarding points that don't overlap with the roi") rois = {*rois} points_df = points_df.query('roi in @rois').copy() columns = ['z', 'y', 'x', 'kind', 'conf', 'roi_label', 'roi'] if fetch_labels: logger.info("Fetching supervoxel under each point") svs = fetch_labels_batched(server, uuid, seg_instance, points_df[['z', 'y', 'x']].values, supervoxels=True, processes=processes) with Timer("Mapping supervoxels to bodies", logger): # Arbitrary heuristic for whether to do the # body-lookups on DVID or on the client. if len(svs) < 100_000: bodies = fetch_mapping(server, uuid, seg_instance, svs) else: mapping = fetch_mappings(server, uuid, seg_instance) mapper = LabelMapper(mapping.index.values, mapping.values) bodies = mapper.apply(svs, True) points_df['sv'] = svs points_df['body'] = bodies columns += ['body', 'sv'] if return_partners: # Filter #partners_df = partners_df.query('post_id in @points_df.index and pre_id in @points_df.index').copy() # Faster filter (via merge) partners_df = partners_df.merge(points_df[[]], 'inner', left_on='pre_id', right_index=True) partners_df = partners_df.merge(points_df[[]], 'inner', left_on='post_id', right_index=True) return points_df[columns], partners_df else: return points_df[columns]
def determine_bodies_of_interest(server, uuid, synapses_instance, rois=None, min_tbars=2, min_psds=10, processes=16, *, synapse_table=None, seg_instance=None): """ Determine which bodies fit the given criteria for minimum synapse counts WITHIN the given ROIs. Note that the min_tbars and min_psds criteria are OR'd together. A body need only match at least one of the criteria to be considered "of interest". This function is just a convenience wrapper around calling fetch_roi_synapses(), fetch_labels_batched(), and body_synapse_counts(). Note: If your synapse table is already loaded and already has a 'body' column, and you aren't providing any rois to filter with, then this function is merely equivalent to calling body_synapse_counts() and filtering it for tbar/psd requirements. Args: server: dvid server uuid: dvid uuid synapses_instance: synapses annotation instance name, e.g. 'synapses' If you are providing a pre-loaded synapse_table and overriding seg_instance, you can set synapses_instance=None. rois: A list of ROI instance names. If provided, ONLY synapses within these ROIs will be counted when determining bodies of interest. If not provided, all synapses in the volume will be counted. min_tbars: All bodies with at least this many t-bars (PreSyn annotations) will be "of interest". min_psds: All bodies with at least this many PSDs (PostSyn annotations) will be "of interest". processes: How many parallel processes to use when fetching synapses and body labels. synapse_table: If you have a pre-loaded synapse table (or a path to one stored as .npy or .csv), you may provide it here, in which case the synapse points won't be fetched from DVID. Furthermore, if the table already contains a 'body' column, then it is presumed to be accurate and body labels will not be fetched from DVID. seg_instance: If you want to override the segmentation instance name to use (rather than inspecting the syanapse instance syncs), provide it here. Returns: pandas DataFrame, as returned by body_synapse_counts(). That is, DataFrame with columns: ['PreSyn', 'PostSyn'], indexed by 'body', where only bodies of interest are included in the table. """ from neuclease.dvid import fetch_labels_batched, fetch_combined_roi_volume, determine_point_rois # Download synapses if necessary if synapse_table is None: with Timer("Fetching synapse points", logger): if rois is None: # Fetch all synapses in the volume points_df, _partners_df = fetch_synapses_in_batches( server, uuid, synapses_instance, processes=processes) else: # Fetch only the synapses within the given ROIs points_df = fetch_roi_synapses(server, uuid, synapses_instance, rois, False, processes=processes) else: # User provided a pre-loaded synapse table (or a path to one) if isinstance(synapse_table, str): with Timer(f"Loading synapse table {synapse_table}", logger): _, ext = os.path.splitext(synapse_table) assert ext in ('.csv', '.npy') if ext == '.csv': synapse_table = load_synapses_csv(synapse_table) elif ext == '.npy': synapse_table = load_synapses_npy(synapse_table) assert isinstance(synapse_table, pd.DataFrame) assert not ({'z', 'y', 'x', 'kind'} - {*synapse_table.columns}), \ "Synapse table does not contain all expected columns" points_df = synapse_table if rois: roi_vol_s5, roi_box_s5, _ = fetch_combined_roi_volume( server, uuid, rois) determine_point_rois(server, uuid, rois, points_df, roi_vol_s5, roi_box_s5) points_df = points_df.query('roi_label != 0') if 'body' in points_df: logger.info("Using user-provided body labels") else: with Timer("Fetching synapse body labels", logger): if seg_instance is None: syn_info = fetch_instance_info(server, uuid, synapses_instance) seg_instance = syn_info["Base"]["Syncs"][0] points_df['body'] = fetch_labels_batched(server, uuid, seg_instance, points_df[['z', 'y', 'x']].values, processes=processes) with Timer("Aggregating body-wise synapse counts"): body_synapses_df = body_synapse_counts(points_df) body_synapses_df = body_synapses_df.query( 'PreSyn >= @min_tbars or PostSyn >= @min_psds') return body_synapses_df