def _write_block(block_id): block = blocking.getBlock(block_id) bb = tuple(slice(beg, end) for beg, end in zip(block.begin, block.end)) # check if we have a mask and if we do if we # have pixels in the mask if mask is not None: m = mask[bb].astype('bool') if m.sum() == 0: return None offset = offsets[block_id] # load the data from this block d = out[bb] if mask is None: if with_background: d[d != 0] += offset else: d += offset d = nt.take(mapping, d) else: if with_background: m[d == 0] = 0 values = (d[m] + offset) values = nt.take(mapping, values) d[m] = values out[bb] = d
def run_lmc(merge_boutons, beta=.7): path = './test_data.h5' graph, feats, sizes, z_edges, boundaries = compute_graph_and_weights( path, return_edge_sizes=True) costs = compute_edge_costs(feats, edge_sizes=sizes, weighting_scheme='z', z_edge_mask=z_edges, beta=beta) costs, lifted_uvs, lifted_costs = _lifted_problem(graph, costs, path) print("Running lifted multicut ...") node_labels = lifted_multicut_gaec(graph, costs, lifted_uvs, lifted_costs) print("... done") if merge_boutons: node_labels = _merge_bouton_labels(node_labels, path) with h5py.File('./test_data.h5', 'r') as f: ws = f['watershed'][:] import nifty.tools as nt seg = nt.take(node_labels, ws) return seg, boundaries
def debug_subresult(block_id=1): from cremi_tools.viewer.volumina import view path = '/g/kreshuk/data/arendt/platyneris_v1/membrane_training_data/validation/segmentation/val_block_01.n5' tmp_folder = './tmp_plat_val' block_prefix = os.path.join(path, 's0', 'sub_graphs', 'block_') graph = ndist.Graph(os.path.join(path, 'graph')) block_path = block_prefix + str(block_id) nodes = ndist.loadNodes(block_path) nodes = nodes[1:] inner_edges, outer_edges, sub_uvs = graph.extractSubgraphFromNodes(nodes) block_res_path = os.path.join( tmp_folder, 'subproblem_results/s0_block%i.npy' % block_id) res = np.load(block_res_path) merge_edges = np.ones(graph.numberOfEdges, dtype='bool') merge_edges[res] = False merge_edges[outer_edges] = False uv_ids = graph.uvIds() n_nodes = int(uv_ids.max()) + 1 ufd = nufd.ufd(n_nodes) ufd.merge(uv_ids[merge_edges]) node_labels = ufd.elementLabeling() ws = z5py.File(path)['volumes/watershed'][:] seg = nt.take(node_labels, ws) view([ws, seg])
def _apply_node_labels(seg, node_labels, allow_empty_assignments): # choose the appropriate mapping: # - 1d np.array -> just apply it # - 2d np.array -> extract the local dict and apply # - dict -> extract the local dict and apply apply_array = False if isinstance( node_labels, dict) else (True if node_labels.ndim == 1 else False) if apply_array: seg = nt.take(node_labels, seg) else: # this copys the dict and hence is extremely RAM hungry # so we make the dict as small as possible this_labels = np.unique(seg) if isinstance(node_labels, dict): # do we allow for assignments that are not in the assignment table? if allow_empty_assignments: this_assignment = { label: node_labels.get(label, label) for label in this_labels } else: this_assignment = { label: node_labels[label] for label in this_labels } else: this_assignment = node_labels[:, 1][np.in1d(node_labels[:, 0], this_labels)] this_assignment = { label: this_assignment[ii] for ii, label in enumerate(this_labels) } seg = nt.takeDict(this_assignment, seg) return seg
def make_new_segmentation(segmentation, blocks, block_coordinates, label_offsets, node_labeling, coordinate_offset): # we will write some parts of the volumes multiple times, # but that should be ok, because the ids will agree due to the merging for block, block_coord, label_offset in zip(blocks, block_coordinates, label_offsets): local_begin = tuple(c.start - off for c, off in zip(block_coord, coordinate_offset)) local_end = tuple(c.stop - off for c, off in zip(block_coord, coordinate_offset)) roi = tuple( slice(beg, end) for beg, end in zip(local_begin, local_end)) segmentation[roi] = nt.take(node_labeling, block + label_offset)
def write_ganglion_segmentation(): seg_path = os.path.join( '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data', '0.6.5/images/local/sbem-6dpf-1-whole-segmented-cells.n5') tab_path = os.path.join( '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data', '0.6.6/tables/sbem-6dpf-1-whole-segmented-cells/ganglia_ids.csv') tab = pd.read_csv(tab_path, sep='\t') seg_key = 'setup0/timepoint0/s0' with z5py.File(seg_path) as f: ds = f[seg_key] max_id = int(ds.attrs['maxId']) + 1 label_ids = tab['label_id'].values.astype('uint32') ganglion_labels = tab['ganglion_id'].values.astype('uint32') label_mapping = np.zeros(max_id, 'uint32') label_mapping[label_ids] = ganglion_labels seg_key = 'setup0/timepoint0/s3' out_path = './sbem-6dpf-1-whole-segmented-ganglia.n5' print("Reading segmentation ...") with z5py.File(seg_path, 'r') as f: ds = f[seg_key] ds.n_threads = 16 seg = ds[:].astype('uint32') print("To ganglion segmentation ...") seg = nt.take(label_mapping, seg) seg = seg.astype('int16') print(seg.shape) print("Writing segmentation ...") n_scales = 4 res = [.2, .16, .16] chunks = (128, ) * 3 downscale_factors = n_scales * [[2, 2, 2]] make_bdv(seg, out_path, downscale_factors, resolution=res, unit='micrometer', chunks=chunks, n_threads=16) with z5py.File(out_path) as f: ds = f['setup0/timepoint0/s0'] ds.attrs['maxId'] = max_id
def _write_block_with_offsets(ds_in, ds_out, blocking, block_id, node_labels, offsets): fu.log("start processing block %i" % block_id) off = offsets[block_id] block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) seg = ds_in[bb] seg[seg != 0] += off # choose the appropriate function for array or dictionary if isinstance(node_labels, np.ndarray): seg = nt.take(node_labels, seg) else: seg = nt.takeDict(node_labels, seg) ds_out[bb] = seg fu.log_block_success(block_id)
def check_result(self, node_labels): node_labels = self.node_labels with z5py.File(self.output_path) as f: ds = f[self.output_key] ds.n_threads = 8 res = ds[:] with z5py.File(self.input_path) as f: ds = f[self.input_path] ds.n_threads = 8 exp = ds[:] exp = nt.take(node_labels, exp) self.assertEqual(res.shape, exp.shape) self.assertTrue(np.array_equal(res, exp))
def trace_from_boutons(threshold): path = './test_data.h5' with h5py.File(path, 'r') as f: bouton_labels = f['bouton_overlaps'][:] print("Computing graph and features ...") graph, feats = compute_graph_and_weights(path) print("... done") axon_labels = trace_axons_prototype(graph, feats, bouton_labels, threshold=threshold) with h5py.File('./test_data.h5', 'r') as f: ws = f['watershed'][:] import nifty.tools as nt seg = nt.take(axon_labels, ws) return seg
def check_results(seg, boundaries): import napari import nifty.tools as nt with h5py.File('./test_data.h5', 'r') as f: raw = f['raw'][:] # boundaries = f['boundaries'][:] ws = f['watershed'][:] # boutons = f['boutons_corrected'][:] b_overlaps = f['bouton_overlaps'][:] overlaps_mapepd = nt.take(b_overlaps, ws) with napari.gui_qt(): viewer = napari.Viewer() viewer.add_image(raw) viewer.add_image(boundaries, visible=False) viewer.add_labels(ws, visible=False) # viewer.add_labels(boutons, visible=False) viewer.add_labels(overlaps_mapepd) viewer.add_labels(seg)
def _apply_node_labels(seg, node_labels, allow_empty_assignments): # choose the appropriate mapping: # - 1d np.array -> just apply it # - 2d np.array -> extract the local dict and apply # - dict -> extract the local dict and apply apply_array = False if isinstance( node_labels, dict) else (True if node_labels.ndim == 1 else False) if apply_array: assert seg.max() < len( node_labels), "Max id %i exceeds number of node labels %i" % ( seg.max(), len(node_labels)) seg = nt.take(node_labels, seg) else: # this copys the dict and hence is extremely RAM hungry # so we make the dict as small as possible this_labels = np.unique(seg) if isinstance(node_labels, dict): # do we allow for assignments that are not in the assignment table? if allow_empty_assignments: this_assignment = { label: node_labels.get(label, label) for label in this_labels } else: this_assignment = { label: node_labels[label] for label in this_labels } else: this_assignment = node_labels[:, 1][np.in1d(node_labels[:, 0], this_labels)] this_assignment = { label: this_assignment[ii] for ii, label in enumerate(this_labels) } # FIXME this casts to uint32 which can lead to nasty over-flows seg = nt.takeDict(this_assignment, seg) return seg
def _write_block(ds_in, ds_out, blocking, block_id, node_labels): fu.log("start processing block %i" % block_id) block = blocking.getBlock(block_id) bb = vu.block_to_bb(block) seg = ds_in[bb] # check if this block is empty and don't write if it is if np.sum(seg != 0) == 0: fu.log_block_success(block_id) return # choose the appropriate function for array or dictionary if isinstance(node_labels, np.ndarray): # this should actually amount to the same as # seg = node_labels[seg] seg = nt.take(node_labels, seg) else: # this copys the dict and hence is extremely RAM hungry # so we make the dict as small as possible this_labels = nt.unique(seg) this_assignment = {label: node_labels[label] for label in this_labels} seg = nt.takeDict(this_assignment, seg) ds_out[bb] = seg fu.log_block_success(block_id)
def debug_costs(): from cremi_tools.viewer.volumina import view from nifty.graph import undirectedGraph path = '/g/kreshuk/data/arendt/platyneris_v1/membrane_training_data/validation/segmentation/val_block_01.n5' costs = z5py.File(path)['costs'][:] edges = z5py.File(path)['graph/edges'][:] assert len(costs) == len(edges) print(np.mean(costs), "+-", np.std(costs)) print(costs.min(), costs.max()) # import matplotlib.pyplot as plt # n, bins, patches = plt.hist(costs, 50) # plt.grid(True) # plt.show() n_nodes = int(edges.max()) + 1 graph = undirectedGraph(n_nodes) graph.insertEdges(edges) assert graph.numberOfEdges == len(costs) node_labels = multicut_gaec(graph, costs) ds = z5py.File(path)['volumes/watershed'] ds.n_threads = 8 ws = ds[:] seg = nt.take(node_labels, ws) bb = np.s_[25:75, 500:1624, 100:1624] input_path = '/g/kreshuk/data/arendt/platyneris_v1/membrane_training_data/validation/predictions/val_block_0%i_unet_lr_v3_ds122.n5' % block_id with z5py.File(input_path) as f: ds = f['data'] ds.n_threads = 8 affs = ds[(slice(0, 3), ) + bb] view([affs.transpose((1, 2, 3, 0)), ws[bb], seg[bb]])
def make_test_data(): in_path = '/g/kreshuk/data/cremi/example/sampleA.n5' raw_key = 'volumes/raw/s0' ws_key = 'volumes/segmentation/watershed' seg_key = 'volumes/segmentation/multicut' bd_key = 'volumes/boundaries' with z5py.File(in_path, 'r') as f: ds = f[ws_key] ds.n_threasds = 8 halo = [25, 512, 512] bb = tuple(slice(sh // 2 - ha, sh // 2 + ha) for sh, ha in zip(ds.shape, halo)) ws = ds[bb] ds = f[seg_key] ds.n_threasds = 8 seg = ds[bb] ds = f[bd_key] ds.n_threads = 8 bd = ds[bb] ds = f[raw_key] ds.n_threads = 8 raw = ds[bb] chunks = ds.chunks print("Run ccs ...") ws = vigra.analysis.labelVolumeWithBackground(ws.astype('uint32')).astype('uint64') seg = vigra.analysis.labelVolumeWithBackground(seg.astype('uint32')).astype('uint64') print("ccs done") node_labels = max_overlaps(ws, seg) unique_labels = np.unique(node_labels) n_merge = 50 merge_to = 3 has_merge = [] node_labels_merged = node_labels.copy() for ii in range(n_merge): merge_id = np.random.choice(unique_labels) while merge_id in has_merge: merge_id = np.random.choice(unique_labels) for jj in range(merge_to): merge_to_id = np.random.choice(unique_labels) while merge_to_id in has_merge: merge_to_id = np.random.choice(unique_labels) node_labels_merged[node_labels_merged == merge_to_id] = merge_id unique_labels = np.unique(node_labels_merged) has_merge.append(merge_id) seg_merged = nt.take(node_labels_merged, ws) assert seg_merged.shape == seg.shape print("Write outputs") out_path = './test_data.n5' out_raw_key = 'volumes/raw/s0' with z5py.File(out_path, 'a') as f: ds = f.create_dataset('volumes/seg/ref', data=seg, chunks=chunks, compression='gzip') ds.attrs['maxId'] = int(seg.max()) ds = f.create_dataset('volumes/seg/merged', data=seg_merged, chunks=chunks, compression='gzip') ds.attrs['maxId'] = int(seg_merged.max()) ds = f.create_dataset('volumes/ws', data=ws, chunks=chunks, compression='gzip') ds.attrs['maxId'] = int(ws.max()) f.create_dataset('node_labels/ref', data=node_labels, compression='gzip') f.create_dataset('node_labels/merged', data=node_labels_merged, compression='gzip') f.create_dataset('volumes/boundaries', data=bd, compression='gzip', chunks=chunks) f.create_dataset(out_raw_key, data=raw, compression='gzip', chunks=chunks) print("Make paintera dataset") # make the paintera dataset tmp_paintera = './tmp_paintera' scale_factors = [[1, 2, 2], [1, 2, 2]] halos = [[1, 2, 2], [1, 2, 2]] target = 'local' max_jobs = 16 downscale(out_path, out_raw_key, 'volumes/raw', scale_factors, halos, tmp_paintera, target, max_jobs) convert_to_paintera_format(out_path, 'volumes/raw', 'volumes/ws', 'volumes/paintera', label_scale=0, resolution=[1, 1, 1], tmp_folder=tmp_paintera, target=target, max_jobs=max_jobs, max_threads=max_jobs, assignment_path=out_path, assignment_key='node_labels/merged', convert_to_label_multisets=True, restrict_sets=[-1, -1])