def _check_result(self): # read the input and compute count und unique ids with z5py.File(self.input_path) as f: seg = f[self.input_key][:] ids, counts = np.unique(seg, return_counts=True) with z5py.File(self.output_path) as f: res = f[self.output_key][:] # check correctness for ids and counts / sizes self.assertEqual(len(res), len(ids)) self.assertTrue(np.allclose(ids, res[:, 0])) self.assertTrue(np.allclose(counts, res[:, 1])) # check correctness for center off mass coms = np.zeros((len(ids), 3)) for label_id in ids: coords = np.where(seg == label_id) com = [np.mean(coord) for coord in coords] coms[label_id] = com self.assertTrue(np.allclose(coms, res[:, 2:5]))
def _check_result(self, mode, check_for_equality=True, threshold=.5): with z5py.File(self.output_path) as f: res = f[self.output_key][:] with z5py.File(self.input_path) as f: inp = f[self.input_key][:] if mode == 'greater': expected = label(inp > threshold) elif mode == 'less': expected = label(inp < threshold) elif mode == 'equal': expected = label(inp == threshold) self.assertEqual(res.shape, expected.shape) # from cremi_tools.viewer.volumina import view # print("view for mode:", mode) # view([inp, res, expected], ['input', 'result', 'expected']) if check_for_equality: score = adjusted_rand_score(expected.ravel(), res.ravel()) self.assertAlmostEqual(score, 1., places=4)
def seg_and_ids(self, n_ids): with z5py.File(self.input_path) as f: ds = f[self.seg_key] ds.n_threads = self.max_jobs seg = ds[:] # pick n_ids random ids ids, counts = np.unique(seg, return_counts=True) ids, counts = ids[1:], counts[1:] ids = ids[counts > self.size_thresh] ids = np.random.choice(ids, n_ids) return seg, ids
def setUp(self): assert os.path.exists(self.path) assert os.path.exists(os.path.join(self.path, self.labels_key)) self.shape = z5py.File(self.path)[self.labels_key].shape self.graph_path = './tmpdir/graph.n5' self.block_shape = [25, 256, 256] self.blocking = nifty.tools.blocking(roiBegin=[0, 0, 0], roiEnd=list(self.shape), blockShape=list(self.block_shape)) if not os.path.exists('tmpdir'): os.mkdir('tmpdir') self.compute_graph()
def _make_labels(self, dependency): # check if we have output labels already dst_key = os.path.join(self.label_out_key, 'data', 's0') with z5py.File(self.path) as f: assert self.label_in_key in f, "key %s not in input file" % self.label_in_key if dst_key in f: return dependency # we make the label output group with z5py.File(self.path) as f: g = f.require_group(self.label_out_key) dgroup = g.require_group('data') # resolve relative paths and links data_path = os.path.abspath(os.path.realpath(dgroup.path)) # if we use label-multisets, we need to create the label multiset for this scale # otherwise, we just make a symlink # make symlink from input dataset to output dataset return self._make_label_multiset(dependency) if self.use_label_multiset\ else self._link_labels(data_path, dependency)
def test_read_zarr_irregular(self): shape = (123, 97) chunks = (17, 32) data = np.random.rand(*shape) fz = zarr.open(self.path) fz.create_dataset('test', data=data, chunks=chunks) f = z5py.File(self.path) out = f['test'][:] self.assertEqual(data.shape, out.shape) self.assertTrue(np.allclose(data, out))
def features_example(shebang, with_filters=False): input_path = '/home/cpape/Work/data/isbi2012/cluster_example/isbi_train.n5' labels_path = '/home/cpape/Work/data/isbi2012/cluster_example/isbi_train.n5' graph_path = '/home/cpape/Work/data/isbi2012/cluster_example/graph.n5' output_path = '/home/cpape/Work/data/isbi2012/cluster_example/features.n5' input_key = 'volumes/affinities' labels_key = 'volumes/watersheds' tmp_folder = './tmp' config_folder = './configs' max_jobs = 8 global_conf = BlockEdgeFeaturesLocal.default_global_config() global_conf.update({'shebang': shebang, 'block_shape': [10, 256, 256]}) with open('./configs/global.config', 'w') as f: json.dump(global_conf, f) task_config = BlockEdgeFeaturesLocal.default_task_config() if with_filters: task_config.update({ 'filters': ['gaussianSmoothing', 'laplacianOfGaussian'], 'sigmas': [1., 2., 4.], 'apply_in_2d': True }) else: task_config.update({'offsets': NEAREST_OFFSETS}) with open('./configs/block_edge_features.config', 'w') as f: json.dump(task_config, f) ret = luigi.build([ EdgeFeaturesWorkflow(input_path=input_path, input_key=input_key, labels_path=labels_path, labels_key=labels_key, graph_path=graph_path, graph_key='graph', output_path=output_path, output_key='features', config_dir=config_folder, tmp_folder=tmp_folder, target='local', max_jobs=max_jobs, max_jobs_merge=1) ], local_scheduler=True) if ret: features = z5py.File(output_path)['features'][:] print(features.shape) for j in range(features.shape[1]): assert np.mean(features[:, j]) != 0 assert np.std(features[:, j]) != 0
def test_retry(self): task = FailingTaskLocal ret = luigi.build([task(output_path=self.output_path, output_key=self.output_key, shape=self.shape, config_dir=self.config_folder, tmp_folder=self.tmp_folder, max_jobs=self.max_jobs)], local_scheduler=True) self.assertTrue(ret) with z5py.File(self.output_path) as f: data = f[self.output_key][:] self.assertTrue(np.allclose(data, 1))
def check_result(self, seg_key): # check shapes with z5py.File(self.input_path) as f: seg = f[seg_key] seg.n_threads = 8 seg = seg[:] shape = seg.shape with z5py.File(self.output_path) as f: shape_ = tuple(f[self.graph_key].attrs['shape']) self.assertEqual(shape, shape_) # check graph # compute nifty rag rag = nrag.gridRag(seg, numberOfLabels=int(seg.max()) + 1) # load the graph graph = ndist.Graph(self.output_path, self.output_key) self.assertEqual(rag.numberOfNodes, graph.numberOfNodes) self.assertEqual(rag.numberOfEdges, graph.numberOfEdges) self.assertTrue(np.array_equal(rag.uvIds(), graph.uvIds()))
def multicut_step2(out_path, node_labeling_key, n_scales, tmp_folder, agglomerator_key): t0 = time.time() last_scale = n_scales agglomerator = AGGLOMERATORS[agglomerator_key] f_graph = z5py.File( os.path.join(tmp_folder, 'merged_graph.n5/s%i' % last_scale)) n_nodes = f_graph.attrs['numberOfNodes'] uv_ids = f_graph['edges'][:] initial_node_labeling = f_graph['nodeLabeling'][:] n_edges = len(uv_ids) # get the costs costs = f_graph['costs'][:] assert len(costs) == n_edges, "%i, %i" (len(costs), n_edges) graph = nifty.graph.undirectedGraph(n_nodes) graph.insertEdges(uv_ids) node_labeling = agglomerator(graph, costs) # get the labeling of initial nodes if initial_node_labeling is None: new_initial_node_labeling = node_labeling else: # should this ever become a bottleneck, we can parallelize this in nifty # but for now this would really be premature optimization new_initial_node_labeling = node_labeling[initial_node_labeling] f_out = z5py.File(out_path, use_zarr_format=False) node_shape = (len(new_initial_node_labeling), ) chunks = (min(len(new_initial_node_labeling), 524288), ) ds_nodes = f_out.create_dataset(node_labeling_key, dtype='uint64', shape=node_shape, chunks=chunks) ds_nodes[:] = new_initial_node_labeling print("Success") print("In %f s" % (time.time() - t0, ))
def check_exported(with_seg=False, with_boundaries=False, scale=3): from heimdall import view, to_source from elf.wrapper.resized_volume import ResizedVolume path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.n5' key = 'setup0/timepoint0/s%i' % (scale + 1, ) f = z5py.File(path, 'r') ds = f[key] ds.n_threads = 8 raw = ds[:] shape = raw.shape data = [to_source(raw, name='raw')] path = './data.n5' key = 'volumes/segmentation2/s%i' % scale f = z5py.File(path, 'r') if with_seg: ds = f[key] ds.n_threads = 8 seg = ds[:].astype('uint32') data.append(to_source(seg, name='segmentation')) key = 'volumes/clustering' ds = f[key] ds.n_threads = 8 clustered = ResizedVolume(ds[:], shape=shape)[:] data.append(to_source(clustered, name='clustered')) path = '/g/arendt/EM_6dpf_segmentation/corrections_and_proofreading/data.n5' key = 'boundaries/s%i' % scale f = z5py.File(path, 'r') if with_boundaries: ds = f[key] ds.n_threads = 8 bd = ds[:] data = to_source(bd, name='boundaries') view(*data)
def __init__( self, syn_file, cleft_cc_ds, seg_file, seg_ds, tgt_file, raw_file=None, raw_ds=None, offset=(0.0, 0.0, 0.0), num_cores=10, safe_mem=False, ): self.synf = z5py.File(syn_file, use_zarr_format=False) self.segf = z5py.File(seg_file, use_zarr_format=False) self.cleft_cc = self.synf[cleft_cc_ds] self.cleft_cc_np = self.synf[cleft_cc_ds][:] self.seg = self.segf[seg_ds] self.partners = None self.num_cores = num_cores # inputs = np.unique(self.cleft_cc[:])[1:] # self.list_of_clefts = Parallel(n_jobs=self.num_cores)(delayed(Cleft.__init__)(Cleft.__new__(Cleft), self, # cid) for cid in inputs) print("finding all clefts...") try: self.list_of_cleftids = range(1, self.cleft_cc.attrs["max_id"] + 1) except AssertionError: self.list_of_cleftids = np.unique(self.cleft_cc[:])[1:] self.list_of_clefts = [ Cleft(self, cid, safe_mem=safe_mem) for cid in self.list_of_cleftids ] self.cremi_file = cremi.CremiFile(tgt_file, "w") self.offset = offset if raw_file is not None: self.rawf = z5py.File(raw_file, use_zarr_format=False) self.raw = self.rawf[raw_ds] else: self.rawf = None self.raw = None
def subsolutions(shebang): input_path = '/home/cpape/Work/data/isbi2012/isbi2012_train_volume.h5' aff_path = '/home/cpape/Work/data/isbi2012/cluster_example/isbi_train.n5' example_path = './isbi_exp.n5' max_jobs = 8 configs = SubSolutionsWorkflow.get_config() global_conf = configs['global'] global_conf.update({'shebang': shebang, 'block_shape': (25, 256, 256)}) with open('./configs/global.config', 'w') as f: json.dump(global_conf, f) ret = luigi.build([ SubSolutionsWorkflow(ws_path=example_path, ws_key='volumes/watersheds', problem_path=example_path, output_path=example_path, output_key='volumes/sub_results', n_scales=0, config_dir='./configs', tmp_folder='./tmp', target='local', max_jobs=max_jobs) ], local_scheduler=True) ret = False if ret: from cremi_tools.viewer.volumina import view with h5py.File(input_path) as f: raw = f['volumes/raw'][:] with z5py.File(aff_path) as f: affs = f['volumes/affinities'][:3].transpose((1, 2, 3, 0)) with z5py.File(example_path) as f: ws = f['volumes/watersheds'][:] seg = f['volumes/segmentation'][:] subseg = f['volumes/sub_results'][:] data = [raw, affs, ws, subseg, seg] labels = ['raw', 'affs', 'ws', 'sub-segmentations', 'segmentation'] view(data, labels)
def predict_dataset(ds_name, checkpoint_path, output_path, output_key, gpu_ids, block_shape=(96, 128, 128), halo=(16, 32, 32)): input_path = f'../../data/{ds_name}/images/local/fibsem-raw.n5' input_key = 'setup0/timepoint0/s1' # we have 3 semantic channels: # r, g and r+g n_channels = 3 # get rid of the first channel in the predictions, which is not interesting # then cast to uint8 to save disc space def postprocess(predictions): assert predictions.shape[0] == 4, f"{predictions.shape}" return to_uint8(predictions[1:]) with z5py.File(input_path, 'r') as f_in, z5py.File(output_path, 'a') as f_out: ds_in = f_in[input_key] out_shape = (n_channels, ) + ds_in.shape ds_out = f_out.require_dataset(output_key, shape=out_shape, compression='gzip', dtype='uint8', chunks=(1, ) + block_shape) predict_with_halo(ds_in, checkpoint_path, gpu_ids, block_shape, halo, use_best=True, output=ds_out, preprocess=normalize, postprocess=postprocess)
def single_gpu_inference(sample, gpu, iteration): path = '/nrs/saalfeld/lauritzen/%s/workspace.n5/raw' % sample assert os.path.exists(path), path rf = z5py.File(path, use_zarr_format=False) shape = rf['gray'].shape weight_meta_graph = '/nrs/saalfeld/heinrichl/synapses/cremi_all_0116_01/unet_checkpoint_%i' % iteration inference_meta_graph = '/nrs/saalfeld/heinrichl/synapses/cremi_all_0116_01/unet_inference' net_io_json = '/nrs/saalfeld/heinrichl/synapses/cremi_all_0116_01/net_io_names.json' out_file = '/nrs/saalfeld/heinrichl/test/lauritzen/%s/workspace.n5' % sample with open(net_io_json, 'r') as f: net_io_names = json.load(f) mhash = hash(path) offset_file = '/nrs/saalfeld/heinrichl/synapses/scott_offsets_{0:}_DTU2_inf/offsets_{' \ '1:}/list_gpu_{2:}.json'.format(sample, mhash, gpu) with open(offset_file, 'r') as f: offset_list = json.load(f) input_key = net_io_names["raw"] output_key = net_io_names["dist"] input_shape = (91, 862, 862) output_shape = (71, 650, 650) prediction = TensorflowPredict(weight_meta_graph, inference_meta_graph, input_key=input_key, output_key=output_key) t_predict = time.time() run_inference_n5(prediction, preprocess, partial(threshold_cc, thr=0., output_shape=output_shape, ds_shape=shape), path, out_file, offset_list, input_shape_wc=input_shape, output_shape_wc=output_shape, target_keys=('syncleft_dist_DTU-2_{0:}'.format(iteration), 'syncleft_cc_DTU-2_{0:}'.format(iteration)), input_key='gray', log_processed=os.path.join( os.path.dirname(offset_file), 'list_gpu_{0:}_{' '1:}_processed.txt'.format(gpu, iteration))) t_predict = time.time() - t_predict with open( os.path.join(os.path.dirname(offset_file), 't-inf_gpu_{0:}_{1:}.txt'.format(gpu, iteration)), 'w') as f: f.write("Inference with gpu %i in %f s\n" % (gpu, t_predict))
def test_rechunk_default(self): from z5py.util import rechunk in_path = os.path.join(self.tmp_dir, 'in.n5') out_path = os.path.join(self.tmp_dir, 'out.n5') # create input file in_file = z5py.File(in_path, use_zarr_format=False) ds_in = in_file.create_dataset('data', dtype='float32', shape=self.shape, chunks=self.chunks, compression='gzip') # write test data data = np.arange(ds_in.size).reshape(ds_in.shape).astype(ds_in.dtype) ds_in[:] = data # rechunk for different out blocks out_file = z5py.File(out_path, use_zarr_format=False) new_chunks = (20, 20, 20) # NOTE we can only choose out blocks that align with the chunks # because otherwise we run into issues due to not thread safe blocking for out_blocks in (None, (40, 40, 40), (60, 60, 60)): ds_str = 'none' if out_blocks is None else '_'.join( map(str, out_blocks)) ds_name = 'data_%s' % ds_str rechunk(in_path, out_path, 'data', ds_name, new_chunks, out_blocks=out_blocks, n_threads=8) # make sure that new data agrees ds_out = out_file[ds_name] data_out = ds_out[:] self.assertEqual(data_out.shape, data.shape) self.assertEqual(ds_out.chunks, new_chunks) self.assertTrue(np.allclose(data, data_out))
def test_remove_dataset(self): from z5py.util import remove_dataset path = './tmp_dir/data.n5' f = z5py.File(path) shape = (100, 100) chunks = (10, 10) ds = f.create_dataset('data', dtype='float64', data=np.ones(shape), chunks=chunks) remove_dataset(ds, 4) self.assertFalse(os.path.exists(os.path.join(path, 'data')))
def main_cell2_crop1(): orig = h5py.File( '/groups/hess/hess_collaborators/Annotations/BigCat Annotations/HeLa_Cell2_Crop1_Periphery' '/Cell2_Crop1_1012x1012x612+6210-31+344.h5', 'r') target = z5py.File( '/groups/saalfeld/saalfeldlab/larissa/data/cell/hela_cell2_crop1_{0:}.n5' .format(datetime.date.today().strftime('%m%d%y')), use_zarr_format=False) # mapping = np.array([0, 9, 8, 10, 4, 2, 1, 1, 5, 11, 12, 14, 6, 7, 3, 13]) mapping = np.array( [0, 4, 3, 10, 16, 2, 1, 1, 17, 11, 8, 26, 18, 19, 29, 9]) ribos = False main(orig, target, mapping, ribos)
def debug_preprocessing_seg(): p_seg = './data.n5' k_seg = 'volumes/segmentation' halo = [50, 512, 512] with z5py.File(p_seg, 'r') as f: ds = f[k_seg] ds.n_threads = 8 shape = ds.shape center = [sh // 2 for sh in shape] bb = tuple(slice(ce - ha, ce + ha) for ce, ha in zip(center, halo)) seg = ds[bb] p_raw = '../../../data/rawdata/sbem-6dpf-1-whole-raw.n5' k_raw = 'setup0/timepoint0/s1' with z5py.File(p_raw, 'r') as f: ds = f[k_raw] ds.n_threads = 8 print(ds.shape) raw = ds[bb] view(to_source(raw), to_source(seg))
def generate_zarr_format(compressors=['gzip', 'blosc', 'zlib', 'raw']): path = 'data/z5py.zr' im = astronaut() f = z5py.File(path, mode='w') for compressor in compressors: copts = COMPRESSION_OPTIONS.get(compressor, {}) name = ( compressor if compressor != "blosc" else "%s/%s" % (compressor, copts.get("codec")) ) f.create_dataset(name, data=im, compression=compressor, chunks=CHUNKS, **copts)
def _check_result(self, trafo): f_in = z5py.File(self.input_path) ds_in = f_in[self.input_key] ds_in.n_threads = 8 exp = ds_in[:] if len(trafo) == 2: a, b = trafo['a'], trafo['b'] exp = a * exp + b else: self.assertEqual(len(trafo), len(exp)) for z in range(len(exp)): a, b = trafo[z]['a'], trafo[z]['b'] exp[z] = a * exp[z] + b f_out = z5py.File(self.output_path) ds_out = f_out[self.output_key] ds_out.n_threads = 8 res = ds_out[:] self.assertEqual(exp.shape, res.shape) self.assertTrue(np.allclose(exp, res))
def smooth_distance_stats_bfs(distances, window=2): skeleton_folder = '/home/papec/mnt/nrs/lauritzen/02/workspace.n5/skeletons' skeleton_ids = os.listdir(skeleton_folder) smoothed_distances = {} for skel_id in skeleton_ids: if not skel_id.isdigit(): continue skel_path = os.path.join(skeleton_folder, skel_id) edges = z5py.File(skel_path)['edges'][:] skel_id = int(skel_id) smoothed_distances[skel_id] = smooth_bfs(distances[skel_id], edges, window) return smoothed_distances
def vi_scores(self): f = z5py.File(self.input_path) ds = f[self.seg_key] ds.n_threads = 8 seg = ds[:] ds = f[self.gt_key] ds.n_threads = 8 gt = ds[:] scores = val.object_vi(seg, gt, ignore_gt=[0]) return scores
def metrics(self): f = z5py.File(self.input_path) ds = f[self.seg_key] ds.n_threads = 8 seg = ds[:] ds = f[self.gt_key] ds.n_threads = 8 gt = ds[:] vis, vim, ri, _ = val.cremi_score(seg, gt, ignore_gt=[0]) return vis, vim, ri
def _run_skel_wf(self, format_, max_jobs): task = SkeletonWorkflow(tmp_folder=self.tmp_folder, config_dir=self.config_folder, target=self.target, max_jobs=max_jobs, input_path=self.path, input_prefix=self.input_prefix, output_path=self.output_path, output_prefix=self.output_prefix, work_scale=0, skeleton_format=format_) ret = luigi.build([task], local_scheduler=True) self.assertTrue(ret) f = z5py.File(self.output_path) self.assertTrue(self.output_prefix in f) out_key = os.path.join(self.output_prefix, 's0') self.assertTrue(out_key in f)
def setUp(self): if not os.path.exists('./tmp'): os.mkdir('./tmp') # make segmenation data seg = np.zeros((100, 100, 100), dtype='uint64') seg[:50] = 1 seg[50:, 50:, 50:] = 2 seg[:50, :50, :50] = 3 f_seg = z5py.File('./tmp/seg.n5', use_zarr_format=False) ds = f_seg.create_dataset('seg', shape=seg.shape, chunks=(20, 20, 20), dtype=seg.dtype) ds[:] = seg # make skeletons f_skels = z5py.File('./tmp/skels.n5', use_zarr_format=False) g = f_skels.require_group('skels') # skeleton 1: only in label 3 skel1 = np.array([[0, 0, 0, 0], [1, 10, 10, 10], [2, 15, 15, 15], [3, 20, 20, 20], [4, 25, 25, 25], [5, 35, 35, 35]], dtype='uint64') g1 = g.create_group('1') c1 = g1.create_dataset('coordinates', shape=skel1.shape, chunks=skel1.shape, dtype='uint64') c1[:] = skel1 # skeleton 2: in label 0 and 2 skel2 = np.array([[0, 60, 0, 0], [1, 70, 10, 10], [2, 75, 25, 25], [3, 75, 45, 45], [4, 80, 55, 55], [5, 85, 65, 65]], dtype='uint64') g2 = g.create_group('2') c2 = g2.create_dataset('coordinates', shape=skel2.shape, chunks=skel2.shape, dtype='uint64') c2[:] = skel2
def find_my_block(block_id): from heimdall import view, to_source scale = 5 rpath = '../../../data/rawdata/sbem-6dpf-1-whole-raw.n5' k = 'setup0/timepoint0/s%i' % scale f = z5py.File(rpath) ds = f[k] ds.n_thread = 8 raw = ds[:] path = './data.n5' k = 'volumes/clustering' f = z5py.File(path) ds = f[k] ds.n_threads = 8 block = ds[:].astype('uint32') if block_id is not None: block = (block == block_id).astype('uint32') view(to_source(raw, name='raw'), to_source(block, name='block-volume'))
def extract_scale_leve(scale): assert scale >= 6 sys.path.append('/home/papec/Work/my_projects/z5/bld/python') import z5py path = '/home/papec/mnt/saalfeldlab/FAFB00/v14_align_tps_20170818_dmg.n5/volumes/raw' key = 's%i' % scale data = z5py.File(path, use_zarr_format=False)[key][:] out_path = '/home/papec/Work/neurodata_hdd/fafb/raw.h5' vigra.writeHDF5(data, out_path, key, compression='gzip', chunks=(64, 64, 64))
def read_n5(data, chunk, compression, save_folder, iterations): key = '%s_%s' % ('_'.join(str(cc) for cc in chunk), compression) save_path = os.path.join(save_folder, 'n5_%s.n5' % key) read_times = [] for _ in range(iterations): t_read = time.time() f_out = z5py.File(save_path) ds = f_out['data'] data_read = ds[:] t_read = time.time() - t_read read_times.append(t_read) assert np.allclose(data, data_read) return read_times
def threshold(path, threshold, sigma): in_key = 'boundaries' out_key = 'thresholded' with z5py.File(path) as f: if out_key in f: return data = f[in_key][:] data = gaussian(data, sigma) data = (data > threshold).astype('uint8') f.create_dataset(out_key, data=data, chunks=CHUNKS, compression='gzip')