def test_labelindex(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup # Need an unlocked node to test these posts uuid = post_branch(dvid_server, dvid_repo, 'test_labelindex', 'test_labelindex') instance_info = (dvid_server, uuid, 'segmentation-scratch') # Write some random data sv = 99 vol = sv * np.random.randint(2, size=(128, 128, 128), dtype=np.uint64) offset = np.array((64, 64, 64)) # DVID will generate the index. post_labelmap_voxels(*instance_info, offset, vol) # Compute labelindex table from scratch rows = [] for block_coord in ndrange(offset, offset + vol.shape, (64, 64, 64)): block_coord = np.array(block_coord) block_box = np.array((block_coord, block_coord + 64)) block = extract_subvol(vol, block_box - offset) count = (block == sv).sum() rows.append([*block_coord, sv, count]) index_df = pd.DataFrame(rows, columns=['z', 'y', 'x', 'sv', 'count']) # Check DVID's generated labelindex table against expected labelindex_tuple = fetch_labelindex(*instance_info, sv, format='pandas') assert labelindex_tuple.label == sv labelindex_tuple.blocks.sort_values(['z', 'y', 'x', 'sv'], inplace=True) labelindex_tuple.blocks.reset_index(drop=True, inplace=True) assert (labelindex_tuple.blocks == index_df).all().all() # Check our protobuf against DVID's index_tuple = PandasLabelIndex(index_df, sv, 1, datetime.datetime.now().isoformat(), 'someuser') labelindex = create_labelindex(index_tuple) # Since labelindex block entries are not required to be sorted, # dvid might return them in a different order. # Hence this comparison function which sorts them first. def compare_proto_blocks(left, right): left_blocks = sorted(left.blocks.items()) right_blocks = sorted(right.blocks.items()) return left_blocks == right_blocks dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf') assert compare_proto_blocks(labelindex, dvid_labelindex) # Check post/get roundtrip post_labelindex(*instance_info, sv, labelindex) dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf') assert compare_proto_blocks(labelindex, dvid_labelindex)
def test_fetch_labelindices(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup # Need an unlocked node to test these posts uuid = post_branch(dvid_server, dvid_repo, 'test_labelindices', 'test_labelindices') instance_info = (dvid_server, uuid, 'segmentation-scratch') # Write some random data vol = np.random.randint(1, 10, size=(128, 128, 128), dtype=np.uint64) offset = np.array((64, 64, 64)) # DVID will generate the index. post_labelmap_voxels(*instance_info, offset, vol) labelindices = fetch_labelindices(*instance_info, list(range(1, 10))) for sv, li in zip(range(1, 10), labelindices.indices): # This function is already tested elsewhere, so we'll use it as a reference li2 = fetch_labelindex(*instance_info, sv) assert li == li2 labelindices = fetch_labelindices(*instance_info, list(range(1, 10)), format='list-of-protobuf') for sv, li in zip(range(1, 10), labelindices): # This function is already tested elsewhere, so we'll use it as a reference li2 = fetch_labelindex(*instance_info, sv) assert li == li2 labelindices = fetch_labelindices(*instance_info, list(range(1, 10)), format='pandas') for sv, li in zip(range(1, 10), labelindices): # This function is already tested elsewhere, so we'll use it as a reference li2 = fetch_labelindex(*instance_info, sv, format='pandas') li_df = li.blocks.sort_values(['z', 'y', 'x']).reset_index(drop=True) li2_df = li2.blocks.sort_values(['z', 'y', 'x']).reset_index(drop=True) assert (li_df == li2_df).all().all() # Test the copy function (just do a round-trip -- hopefully I didn't swap src and dest anywhere...) copy_labelindices(instance_info, instance_info, list(range(1, 10)), batch_size=2) copy_labelindices(instance_info, instance_info, list(range(1, 10)), batch_size=2, processes=2)
def fetch_brick_coords(body, supervoxel_subset): """ Fetch the block coordinates for the given body, filter them for the given supervoxels (if any), and convert the block coordinates to brick coordinates. """ assert is_supervoxels or supervoxel_subset is None try: with mgr.access_context(server, True, 1, 1): labelindex = fetch_labelindex(server, uuid, instance, body, 'protobuf') coords_df = convert_labelindex_to_pandas(labelindex).blocks except HTTPError as ex: if (ex.response is not None and ex.response.status_code == 404): return (body, None) raise except RuntimeError as ex: if 'does not map to any body' in str(ex): return (body, None) raise if len(coords_df) == 0: return (body, None) if is_supervoxels: supervoxel_subset = set(supervoxel_subset) coords_df = coords_df.query('sv in @supervoxel_subset').copy() coords_df[['z', 'y', 'x']] //= brick_shape coords_df['body'] = np.uint64(body) coords_df.drop_duplicates(inplace=True) return (body, coords_df)
def _repair_index(master_seg, body): pli = fetch_labelindex(*master_seg, body, format='pandas') # Just drop the blocks below coordinate 1024 # (That's where the bad blocks were added, and # there isn't supposed to be segmentation in that region.) pli.blocks.query('z >= 1024 and y >= 1024 and x >= 1024', inplace=True) li = create_labelindex(pli) post_labelindex(*master_seg, pli.label, li)
def process_batch(self, batch_and_rowcount): """ Takes a batch of grouped stats rows and sends it to dvid in the appropriate protobuf format. If self.check_mismatches is True, read the labelindex for each """ next_stats_batch, next_stats_batch_total_rows = batch_and_rowcount labelindex_batch = chain( *map(self.label_indexes_for_body, next_stats_batch)) if not self.check_mismatches: post_labelindex_batch(*self.instance_info, labelindex_batch) return next_stats_batch_total_rows, [], [] # Check for mismatches mismatch_batch = [] missing_batch = [] for labelindex in labelindex_batch: try: existing_labelindex = fetch_labelindex(*self.instance_info, labelindex.label) except requests.RequestException as ex: missing_batch.append(labelindex) if not str(ex.response.status_code).startswith('4'): logger.warning( f"Failed to fetch LabelIndex for label: {labelindex.label} due to error {ex.response.status_code}" ) else: if (labelindex.blocks != existing_labelindex.blocks): # Update the mut_id to match the previous one. labelindex.last_mutid = existing_labelindex.last_mutid mismatch_batch.append(labelindex) # Post mismatches (only) post_labelindex_batch(*self.instance_info, mismatch_batch + missing_batch) # Return mismatch IDs mismatch_labels = [labelindex.label for labelindex in mismatch_batch] missing_labels = [labelindex.label for labelindex in missing_batch] return next_stats_batch_total_rows, mismatch_labels, missing_labels
def process_batch(self, batch_and_rowcount): """ Takes a batch of grouped stats rows and sends it to dvid in the appropriate protobuf format. If self.check_mismatches is True, read the labelindex for each """ next_stats_batch, next_stats_batch_total_rows = batch_and_rowcount labelindex_batch = chain(*map(self.label_indexes_for_body, next_stats_batch)) if not self.check_mismatches: self.post_labelindex_batch(labelindex_batch) return next_stats_batch_total_rows, [], [] # Check for mismatches mismatch_batch = [] missing_batch = [] for labelindex in labelindex_batch: try: existing_labelindex = fetch_labelindex(*self.instance_info, labelindex.label, session=self.session) except requests.RequestException as ex: missing_batch.append(labelindex) if ex.response is None: logger.warning(f"Failed to fetch LabelIndex for label: {labelindex.label} due to no response") elif not str(ex.response.status_code).startswith('4'): logger.warning(f"Failed to fetch LabelIndex for label: {labelindex.label} due to error {ex.response.status_code}") else: if (labelindex.blocks != existing_labelindex.blocks): # Update the mut_id to match the previous one. labelindex.last_mutid = existing_labelindex.last_mutid mismatch_batch.append(labelindex) # Post mismatches (only) self.post_labelindex_batch(mismatch_batch + missing_batch) # Return mismatch IDs mismatch_labels = [labelindex.label for labelindex in mismatch_batch] missing_labels = [labelindex.label for labelindex in missing_batch] return next_stats_batch_total_rows, mismatch_labels, missing_labels
def test_masksegmentation_basic(setup_dvid_segmentation_input, invert_mask, roi_dilation, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, roi_mask_s5, input_segmentation_name, output_segmentation_name = setup_dvid_segmentation_input if invert_mask: roi_mask_s5 = ~roi_mask_s5 config["masksegmentation"]["invert-mask"] = invert_mask config["masksegmentation"]["dilate-roi"] = roi_dilation # re-dump config yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config input_box_xyz = np.array(final_config['input']['geometry']['bounding-box']) input_box_zyx = input_box_xyz[:, ::-1] roi_mask = upsample(roi_mask_s5, 2**5) roi_mask = extract_subvol(roi_mask, input_box_zyx) expected_vol = extract_subvol(volume.copy(), input_box_zyx) expected_vol[roi_mask] = 0 output_box_xyz = np.array( final_config['output']['geometry']['bounding-box']) output_box_zyx = output_box_xyz[:, ::-1] output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0, supervoxels=True) # Create a copy of the volume that contains only the voxels we removed erased_vol = volume.copy() erased_vol[~roi_mask] = 0 if EXPORT_DEBUG_FILES: original_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, output_box_zyx, scale=0, supervoxels=True) original_agglo_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, input_segmentation_name, output_box_zyx, scale=0) output_agglo_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx, scale=0) np.save('/tmp/original-svs.npy', original_vol) np.save('/tmp/original-agglo.npy', original_agglo_vol) np.save('/tmp/output.npy', output_vol) np.save('/tmp/output-agglo.npy', output_agglo_vol) np.save('/tmp/expected.npy', expected_vol) np.save('/tmp/erased.npy', erased_vol) shutil.copyfile(f'{execution_dir}/roi-mask.h5', '/tmp/roi-mask.h5') if roi_dilation: shutil.copyfile(f'{execution_dir}/dilated-roi-mask.h5', '/tmp/dilated-roi-mask.h5') if invert_mask: shutil.copyfile(f'{execution_dir}/segmentation-mask.h5', '/tmp/segmentation-mask.h5') shutil.copyfile(f'{execution_dir}/final-mask.h5', '/tmp/final-mask.h5') if roi_dilation > 0: # FIXME: We don't yet verify voxel-accuracy of ROI dilation. return assert (output_vol == expected_vol).all(), \ "Written vol does not match expected" scaled_expected_vol = expected_vol for scale in range(1, 1 + MAX_SCALE): scaled_expected_vol = downsample(scaled_expected_vol, 2, 'labels-numba') scaled_output_vol = fetch_labelmap_voxels(dvid_address, repo_uuid, output_segmentation_name, output_box_zyx // 2**scale, scale=scale, supervoxels=True) if EXPORT_DEBUG_FILES: np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol) np.save(f'/tmp/expected-{scale}.npy', scaled_expected_vol) np.save(f'/tmp/output-{scale}.npy', scaled_output_vol) if scale <= 5: assert (scaled_output_vol == scaled_expected_vol).all(), \ f"Written vol does not match expected at scale {scale}" else: # For scale 6 and 7, some blocks are not even changed, # but that means we would be comparing DVID's label # downsampling method to our method ('labels-numba'). # The two don't necessarily give identical results in the case of 'ties', # so we'll just verify that the nonzero voxels match, at least. assert ((scaled_output_vol == 0) == (scaled_expected_vol == 0)).all(), \ f"Written vol does not match expected at scale {scale}" block_stats_path = f'{execution_dir}/erased-block-statistics.h5' with h5py.File(block_stats_path, 'r') as f: stats_df = pd.DataFrame(f['stats'][:]) # # Check the exported block statistics # stats_cols = [*BLOCK_STATS_DTYPES.keys()] assert stats_df.columns.tolist() == stats_cols stats_df = stats_df.sort_values(stats_cols).reset_index() expected_stats_df = block_stats_for_volume((64, 64, 64), erased_vol, input_box_zyx) expected_stats_df = expected_stats_df.sort_values(stats_cols).reset_index() assert len(stats_df) == len(expected_stats_df) assert (stats_df == expected_stats_df).all().all() # # Try updating the labelindexes # src_info = (dvid_address, repo_uuid, input_segmentation_name) dest_info = (dvid_address, repo_uuid, output_segmentation_name) with switch_cwd(execution_dir): erase_from_labelindexes(src_info, dest_info, block_stats_path, batch_size=10, threads=4) # Verify deleted supervoxels assert os.path.exists(f'{execution_dir}/deleted-supervoxels.csv') deleted_svs = set( pd.read_csv(f'{execution_dir}/deleted-supervoxels.csv')['sv']) orig_svs = {*pd.unique(volume.reshape(-1))} - {0} remaining_svs = {*pd.unique(expected_vol.reshape(-1))} - {0} expected_deleted_svs = orig_svs - remaining_svs assert deleted_svs == expected_deleted_svs # Verify remaining sizes expected_sv_counts = (pd.Series( expected_vol.reshape(-1), name='sv').value_counts().drop(0).sort_index().rename('count')) index_dfs = [] for body in np.unique(fetch_mapping(*dest_info, remaining_svs)): index_df = fetch_labelindex(*dest_info, body, format='pandas').blocks index_dfs.append(index_df) sv_counts = (pd.concat(index_dfs, ignore_index=True)[[ 'sv', 'count' ]].groupby('sv')['count'].sum().sort_index()) assert set(sv_counts.index.values) == set(expected_sv_counts.index.values) assert (sv_counts == expected_sv_counts).all(), \ pd.DataFrame({'stored_count': sv_counts, 'expected_count': expected_sv_counts}).query('stored_count != expected_count') # Verify mapping # Deleted supervoxels exist in the mapping, but they map to 0. assert (fetch_mapping(*dest_info, [*deleted_svs]) == 0).all() # Remaining supervoxels still map to their original bodies assert (fetch_mapping(*dest_info, [*remaining_svs]) == fetch_mapping( *src_info, [*remaining_svs])).all()
def main(): configure_default_logging() parser = argparse.ArgumentParser() parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('instance') parser.add_argument('block_stats') args = parser.parse_args() seg_instance = (args.server, args.uuid, args.instance) from flyemflows.bin.ingest_label_indexes import load_stats_h5_to_records with Timer("Loading block stats", logger): (block_sv_stats, _presorted_by, _agglo_path) = load_stats_h5_to_records('block-statistics.h5') stats_df = pd.DataFrame(block_sv_stats) stats_df = stats_df[['z', 'y', 'x', 'segment_id', 'count']] stats_df = stats_df.rename(columns={'segment_id': 'sv'}) # Keep only the new supervoxels. stats_df = stats_df.query('sv > @NEW_SV_THRESHOLD').copy() with Timer("Fetching old labelindex", logger): labelindex = fetch_labelindex(*seg_instance, 106979579, format='protobuf') with Timer("Extracting labelindex table", logger): old_df = convert_labelindex_to_pandas(labelindex).blocks with Timer("Patching labelindex table", logger): # Discard old supervoxel stats within patched area in_patch = (old_df[['z', 'y', 'x']].values >= PATCH_BOX_ZYX[0]).all(axis=1) in_patch &= (old_df[['z', 'y', 'x']].values < PATCH_BOX_ZYX[1]).all(axis=1) old_df['in_patch'] = in_patch unpatched_df = old_df.query('not (in_patch and sv == @FRANKENBODY_SV)').copy() del unpatched_df['in_patch'] # Append new stats new_df = pd.concat((unpatched_df, stats_df), ignore_index=True) new_df = new_df.sort_values(['z', 'y', 'x', 'sv']) np.save('old_df.npy', old_df.to_records(index=False)) np.save('new_df.npy', new_df.to_records(index=False)) if old_df['count'].sum() != new_df['count'].sum(): logger.warning("Old and new indexes do not have the same total counts. See old_df.npy and new_df.npy") with Timer("Constructing new labelindex", logger): last_mutid = fetch_repo_info(*seg_instance[:2])["MutationID"] mod_time = datetime.datetime.now().isoformat() new_li = PandasLabelIndex(new_df, FRANKENBODY_SV, last_mutid, mod_time, os.environ.get("USER", "unknown")) new_labelindex = create_labelindex(new_li) with Timer("Posting new labelindex", logger): post_labelindex(*seg_instance, FRANKENBODY_SV, new_labelindex) with Timer("Posting updated mapping", logger): new_mapping = pd.Series(FRANKENBODY_SV, index=new_df['sv'].unique(), dtype=np.uint64, name='body') post_mappings(*seg_instance, new_mapping, last_mutid) logger.info("DONE")
def process_batch(self, batch_and_rowcount): """ Given a batch of ERASED block stats, fetches the existing LabelIndex, subtracts the erased stats, and posts either an updated labelindex or a tombstone (if the body is completely erased). """ next_stats_batch, next_stats_batch_total_rows = batch_and_rowcount batch_indexes = [] missing_bodies = [] unexpected_dfs = [] all_deleted_svs = [] for body_group in next_stats_batch: body_id = body_group[0]['body_id'] try: old_index = fetch_labelindex(*self.src_info, body_id, format='pandas') except requests.RequestException as ex: missing_bodies.append(body_id) if not str(ex.response.status_code).startswith('4'): logger.warning( f"Failed to fetch LabelIndex for label: {body_id} due to error {ex.response.status_code}" ) continue old_df = old_index.blocks erased_df = pd.DataFrame(body_group).rename( columns={'segment_id': 'sv'})[['z', 'y', 'x', 'sv', 'count']] assert erased_df.columns.tolist() == old_df.columns.tolist() assert old_df.duplicated(['z', 'y', 'x', 'sv']).sum() == 0 assert erased_df.duplicated(['z', 'y', 'x', 'sv']).sum() == 0 # Find the rows that exist on the old side (or both) merged_df = old_df.merge(erased_df, 'outer', on=['z', 'y', 'x', 'sv'], suffixes=['_old', '_erased'], indicator='side') merged_df['count_old'] = merged_df['count_old'].fillna(0).astype( np.uint32) merged_df['count_erased'] = merged_df['count_erased'].fillna( 0).astype(np.uint32) # If some supervoxel was "erased" from a particular block and the original # labelindex didn't mention it, that's a sign of corruption. # Save it for subsequent analysis unexpected_df = merged_df.query('count_old == 0').copy() if len(unexpected_df) > 0: unexpected_df['body'] = body_id unexpected_dfs.append(unexpected_df) merged_df = merged_df.query('count_old > 0').copy() merged_df[ 'count'] = merged_df['count_old'] - merged_df['count_erased'] new_df = merged_df[['z', 'y', 'x', 'sv', 'count']] new_df = new_df.query('count > 0').copy() deleted_svs = set(old_df['sv']) - set(new_df['sv']) if deleted_svs: deleted_svs = np.fromiter(deleted_svs, dtype=np.uint64) all_deleted_svs.append(deleted_svs) if len(new_df) == 0: # Nothing to keep. Make a tombstone. tombstone_index = LabelIndex() tombstone_index.label = body_id tombstone_index.last_mutid = self.last_mutid tombstone_index.last_mod_user = self.user tombstone_index.last_mod_time = self.mod_time batch_indexes.append(tombstone_index) else: pli = PandasLabelIndex(new_df, body_id, self.last_mutid, self.mod_time, self.user) new_labelindex = create_labelindex(pli) batch_indexes.append(new_labelindex) # Write entire batch to DVID post_labelindex_batch(*self.dest_info, batch_indexes) # Return missing body IDs and the set of unexpected rows if unexpected_dfs: unexpected_df = pd.concat(unexpected_dfs) else: unexpected_df = None if all_deleted_svs: all_deleted_svs = np.concatenate(all_deleted_svs) return next_stats_batch_total_rows, missing_bodies, unexpected_df, all_deleted_svs