def test_aniso(view=False): pmap = vigra.readHDF5('./test_data/anisotropic/pmap.h5', 'data') ws_aniso_dt, n_labels_aniso = ws_anisotropic_distance_transform( pmap, 0.4, 10., 2.) check_consecutive(ws_aniso_dt) assert n_labels_aniso == ws_aniso_dt.max() + 1 print "Anisotropic distance transform watershed done" res_dt = [] res_gray = [] for n_threads in (1, 4): ws_dt, n_labels_dt = ws_distance_transform_2d_stacked( pmap, 0.4, 2., n_threads=n_threads) check_consecutive(ws_dt) assert n_labels_dt == ws_dt.max() + 1, "%i, %i" % (n_labels_dt, ws_dt.max() + 1) res_dt.append(n_labels_dt) print "Distance transform watershed done" ws_gray, n_labels_gray = ws_grayscale_distance_transform_2d_stacked( pmap, 0.1, 2., n_threads=n_threads) check_consecutive(ws_gray) assert n_labels_gray == ws_gray.max() + 1 res_gray.append(n_labels_gray) print "Grayscale distance transform watershed done" assert res_dt[0] == res_dt[1] assert res_gray[0] == res_gray[1] if view: raw = vigra.readHDF5('./test_data/anisotropic/raw.h5', 'data') volumina_n_layer([raw, pmap, ws_aniso_dt, ws_dt, ws_gray], ['raw', 'pmap', 'ws_aniso_dt', 'ws_dt', 'ws_gray'])
def learn_rf(): import cremi_tools.segmentation as cseg raw_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/raw_train_normalized.h5' pmap_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/probabilities_train.h5' assert os.path.exists(pmap_path), pmap_path ws_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/overseg_train.h5' assert os.path.exists(ws_path), ws_path # load pmap and watersheds raw = vigra.readHDF5(raw_path, 'data').astype('float32') pmap = vigra.readHDF5(pmap_path, 'data') ws = vigra.readHDF5(ws_path, 'data').astype('uint64') assert ws.shape == pmap.shape # feature extractor and multicut rag = nrag.gridRag(ws, numberOfLabels=int(ws.max() + 1)) # feature extractor and multicut feature_extractor = cseg.FeatureExtractor(True) features = feature_extractor(rag, pmap, ws, raw) gt_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/gt_train.h5' gt = vigra.readHDF5(gt_path, 'data') node_labels = nrag.gridRagAccumulateLabels(rag, gt) uv_ids = rag.uvIds() labels = node_labels[uv_ids[:, 0]] != node_labels[uv_ids[:, 1]] assert len(labels) == len(features), "%i, %i" % (len(labels), len(features)) print("learning rf from features", features.shape) rf = RandomForestClassifier(n_jobs=40, n_estimators=500) rf.fit(features, labels) with open('./rf.pkl', 'wb') as f: pickle.dump(rf, f)
def get_target(ds_str = "sopnetcompare_train"): assert ds_str in ("sopnetcompare_train",), ds_str # TODO more datasets!!! print "Loading Features and Labels for:", ds_str labelpath = '/home/constantin/Work/data_hdd/data_110915/sopnet_comparison/processed/facelabs/facelabs_mitooff.h5' ffeatpath = '/home/constantin/Work/data_hdd/cache/cached_datasets/sopnetcompare_train/features/ffeats/ffeat_bert_0_True.h5' feats = np.nan_to_num( vigra.readHDF5(ffeatpath, 'data') ) import h5py lab_file = h5py.File(labelpath) key = lab_file.keys()[0] lab_file.close() labels = np.array( vigra.readHDF5(labelpath, key) ) feats = feats[labels != 0.5] labels = labels[labels != 0.5] labels = labels[:,np.newaxis] assert all(np.unique(labels) == np.array([0, 1])) assert labels.shape[0] == feats.shape[0] labels = np.squeeze(labels) return (feats, labels)
def regression_test_cremi(samples): # run all multicuts for ds_test in samples: train_inputs = './cremi_inputs/%s/train_files.json' % ds_test assert os.path.exists(train_inputs), train_inputs test_inputs = './cremi_inputs/%s/test_files.json' % ds_test assert os.path.exists(test_inputs), test_inputs subprocess.call(['python', 'learn.py', train_inputs]) subprocess.call(['python', 'mc.py', test_inputs]) print "Eval Cremi" for ds_test in samples: vi_split_ref, vi_merge_ref, adapted_ri_ref = reference_values_mc[ ds_test] test_inputs = './cremi_inputs/%s/test_files.json' % ds_test assert os.path.exists(test_inputs), test_inputs with open(test_inputs) as f: in_files = json.load(f) gt_p = in_files['gt'] mc_p = os.path.join(in_files['cache'], 'MulticutSegmentation.h5') gt = vigra.readHDF5(gt_p, 'data') mc_seg = vigra.readHDF5(mc_p, 'data') print "Regression Test MC for %s..." % ds_test regression_test(gt, mc_seg, vi_split_ref, vi_merge_ref, adapted_ri_ref)
def compare_caches_train(cache_folder_1, cache_folder_2): files_1 = os.listdir(cache_folder_1) files_2 = os.listdir(cache_folder_2) # compare edge labels edge_labels_1 = [ff for ff in files_1 if ff.startswith('edge_gt')] assert len(edge_labels_1) == 1 labels_1 = vigra.readHDF5(os.path.join(cache_folder_1, edge_labels_1[0]), 'data') edge_labels_2 = [ff for ff in files_2 if ff.startswith('edge_gt')] assert len(edge_labels_2) == 1 labels_2 = vigra.readHDF5(os.path.join(cache_folder_2, edge_labels_2[0]), 'data') assert np.allclose(labels_1, labels_2), "%i / %i" % (np.sum( np.isclose(labels_1, labels_2)), len(labels_1)) print "Passed labels check" # compare filters compare_caches_filters(cache_folder_1 + '/filters/filters_2d/inp_0', cache_folder_2 + '/filters/filters_2d/inp_0') compare_caches_filters(cache_folder_1 + '/filters/filters_2d/inp_1', cache_folder_2 + '/filters/filters_2d/inp_1') print "Passed filters check" # compare features compate_caches_feats(os.path.join(cache_folder_1, 'features'), os.path.join(cache_folder_2, 'features'))
def view(filepaths, filekeys, names=None, types=None, swapaxes=None, crop=None): inputs = [] this_type = None swp = None if crop is None: crop = np.s_[:, :, :] for idx, filepath in enumerate(filepaths): if types is not None: this_type = types[idx] if swapaxes is not None: swp = swapaxes[idx] if this_type is not None: inputs.append( vigra.readHDF5(filepath, filekeys[idx]).astype(this_type)[crop]) else: inputs.append(vigra.readHDF5(filepath, filekeys[idx])[crop]) if swp is not None: inputs[-1] = inputs[-1].swapaxes(*swp) print inputs[-1].shape inputs[0][inputs[0] != 25] = 0 # inputs[0][inputs[0] == 25] = 1 volumina_viewer.volumina_n_layer(inputs, names)
def get_data_x(self, data_name="train"): """Returns the desired data as a ready-to-use n x d sample (n instances with d features). :param data_name: name of the data, either "train" or "test" :return: data """ if not data_name in ["train", "test"]: raise Exception( 'LPData.get_data_x(): Parameter data_name must be either "train" or "test".' ) if data_name == "train": file_names = self.feature_file_names_train elif data_name == "test": file_names = self.feature_file_names_test if len(file_names) == 0: raise Exception( "LPData.get_data_x(): There is no data that can be returned.") # Load the first feature to get the number of instances. d = vigra.readHDF5(file_names[0], self.feat_h5_key).flatten() data = numpy.zeros((d.shape[0], len(file_names))) data[:, 0] = d # Load the other features. for i, file_name in enumerate(file_names[1:]): data[:, i + 1] = vigra.readHDF5(file_name, self.feat_h5_key).flatten() return data
def segment_block(block_id, weight_edges=False, cached=False): import cremi_tools.segmentation as cseg raw_path = '/home/papec/Work/neurodata_hdd/fib25/raw/raw_block%i.h5' % block_id pmap_path = '/home/papec/Work/neurodata_hdd/fib25/pmaps/probs_squeezed_block%i.h5' % block_id ws_path = '/home/papec/Work/neurodata_hdd/fib25/watersheds/watershed_agglomerated_0.075000_block%i.h5' % block_id # load pmap and watersheds raw = vigra.readHDF5(raw_path, 'data').astype('float32') pmap = vigra.readHDF5(pmap_path, 'data') ws = vigra.readHDF5(ws_path, 'data') if cached: edge_probs = vigra.readHDF5('edge_probs_%i.h5' % block_id, 'data') rag = nrag.gridRag(ws, numberOfLabels=int(ws.max()) + 1) # TODO edge sizes else: # feature extractor and multicut feature_extractor = cseg.RandomForestFeatures('./rf.pkl', True) # make graph and costs rag, edge_probs, _, edge_sizes = feature_extractor(pmap, ws, raw=raw) vigra.writeHDF5(edge_probs, 'edge_probs_%i.h5' % block_id, 'data') graph = nifty.graph.undirectedGraph(rag.numberOfNodes) graph.insertEdges(rag.uvIds()) mc = cseg.Multicut('kernighan-lin', weight_edges=weight_edges) if weight_edges: costs = mc.probabilities_to_costs(edge_probs, edge_sizes) else: costs = mc.probabilities_to_costs(edge_probs) node_labels = mc(graph, costs) return nrag.projectScalarNodeDataToPixels(rag, node_labels)
def load_data(labels=None): """ Load the data sets. :param labels: list with the labels that should be used :return: train_x, train_y, test_x, test_y """ # Load the data. train_x = numpy.array( vigra.readHDF5("data/mnist/train.h5", "data").transpose()) train_y = vigra.readHDF5("data/mnist/train.h5", "labels") test_x = numpy.array( vigra.readHDF5("data/mnist/test.h5", "data").transpose()) test_y = vigra.readHDF5("data/mnist/test.h5", "labels") # Reduce the data to the given labels. if labels is not None: train_indices = numpy.array( [i for i, t in enumerate(train_y) if t in labels]) train_x = train_x[train_indices] train_y = train_y[train_indices] test_indices = numpy.array( [i for i, t in enumerate(test_y) if t in labels]) test_x = test_x[test_indices] test_y = test_y[test_indices] return train_x, train_y, test_x, test_y
def get_data_y(self, data_name="train", data_type="gt"): """Returns the desired ground truth labels. :param data_name: name of the data, either "train" or "test" :param data_type: type of the ground truth, either "gt" or "dists" :return: """ if not data_name in ["train", "test"]: raise Exception( 'LPData.get_data_y(): Parameter data_name must be either "train" or "test".' ) if not data_type in ["gt", "dists"]: raise Exception( 'LPData.get_data_y(): Parameter data_type must be either "gt" or "dists".' ) # Load the desired data. if data_name == "train" and data_type == "gt": return vigra.readHDF5(self.gt_train_path, self.gt_train_key).flatten() if data_name == "train" and data_type == "dists": return vigra.readHDF5(self.dists_train_path, self.dists_h5_key).flatten() if data_name == "test" and data_type == "gt": return vigra.readHDF5(self.gt_test_path, self.gt_test_key).flatten() if data_name == "test" and data_type == "dists": return vigra.readHDF5(self.dists_test_path, self.dists_h5_key).flatten() raise Exception( "LPData.get_data_y(): Congratulations, you have reached unreachable code." )
def eval_all(res): from NeuroMetrics import Metrics m = Metrics() res = vigra.readHDF5(res, "data").astype(np.uint32) gt = vigra.readHDF5( "/home/constantin/Work/neurodata_hdd/neuroproof_data/gt_test.h5", "data").astype(np.uint32) m.computeContingencyTable(gt.ravel(), res.ravel()) #print "RI", m.randIndex() #print "VI", m.variationOfInformation() print #print "RandScore:", m.randScore() print "RandRecall:", m.randRecall() print "RandPrecision:", m.randPrecision() print #print "ViScore:", m.viScore() print "ViRecall:", m.viRecall() print "ViPrecision:", m.viPrecision()
def load_neuro_data(): """ Load the neuro dataset. :return: train_x, train_y, test_x, test_y """ # Load the data. train_x = vigra.readHDF5("data/neuro/train/ffeat_br_segid0.h5", "ffeat_br") train_y = numpy.array( vigra.readHDF5("data/neuro/train/gt_face_segid0.h5", "gt_face")[:, 0]) test_x = vigra.readHDF5("data/neuro/test/ffeat_br_segid0.h5", "ffeat_br") test_y = numpy.array( vigra.readHDF5("data/neuro/test/gt_face_segid0.h5", "gt_face")[:, 0]) assert train_x.shape[0] == train_y.shape[0] assert test_x.shape[0] == test_y.shape[0] assert train_x.shape[1] == test_x.shape[1] # Remove NaN values. to_remove = numpy.where(numpy.isnan(train_x)) train_x = numpy.delete(train_x, to_remove, axis=0) train_y = numpy.delete(train_y, to_remove) to_remove = numpy.where(numpy.isnan(test_x)) test_x = numpy.delete(test_x, to_remove, axis=0) test_y = numpy.delete(test_y, to_remove) return train_x, train_y, test_x, test_y
def eval_block(block_id, res_prefix): gt = Volume(vigra.readHDF5('/home/papec/Work/neurodata_hdd/fib25/gt/gt_block%i.h5' % block_id, 'data')) res = Volume(vigra.readHDF5('%s_%i.h5' % (res_prefix, block_id), 'data')) metrics = NeuronIds(gt) are = metrics.adapted_rand(res) vi_s, vi_m = metrics.voi(res) return are, vi_s, vi_m
def regression_test_nproof(cache_folder, data_folder): # if the cache does not exist, create it if not os.path.exists( os.path.join(cache_folder, 'nproof_train') ): meta = init(cache_folder, data_folder, 'nproof') else: meta = MetaSet(cache_folder) meta.load() # isbi params params = ExperimentSettings() params.rf_cache_folder = os.path.join(cache_folder, "rf_cache") params.use_2d = False params.anisotropy_factor = 1. params.ignore_mask = False params.n_trees = 500 params.solver = "multicut_fusionmoves" params.lifted_neighborhood = 2 local_feats_list = ("raw", "prob", "reg", "topo") lifted_feats_list = ("cluster", "reg") ds_train = meta.get_dataset('nproof_train') ds_test = meta.get_dataset('nproof_test') mc_seg = run_mc( ds_train, ds_test, local_feats_list, params) lmc_seg = run_lmc(ds_train, ds_test, local_feats_list, lifted_feats_list, params, 2.) print "Regression Test MC..." # Eval differences with same parameters and according regression thresholds # vi-split: 0.31985479849 -> 0.35 vi_split_ref = 0.35 # vi-merge: 0.402968960935 -> 0.45 vi_merge_ref = 0.45 # adapted-ri: 0.122123986224 -> 0.15 adapted_ri_ref = 0.15 regression_test( vigra.readHDF5(os.path.join(data_folder,'gt_test.h5'), 'data'), mc_seg, vi_split_ref, vi_merge_ref, adapted_ri_ref ) print "... passed" # Eval differences with same parameters and according regression thresholds # vi-split: 0.332745302066 => 0.4 vi_split_ref = 0.4 # vi-merge: 0.332349723508 => 0.4 vi_merge_ref = 0.4 # adapted-ri: 0.0942531472586 => 0.12 adapted_ri_ref = 0.12 regression_test( vigra.readHDF5(os.path.join(data_folder,'gt_test.h5'), 'data'), lmc_seg ) print "... passed"
def merge_blocks(ovlp_ids, tmp_folder, offsets, ovlp_threshold): id_a, id_b = ovlp_ids path_a = os.path.join(tmp_folder, 'block_%i_%i.h5' % (id_a, id_b)) path_b = os.path.join(tmp_folder, 'block_%i_%i.h5' % (id_b, id_a)) ovlp_a = vigra.readHDF5(path_a, 'data') ovlp_b = vigra.readHDF5(path_b, 'data') offset_a, offset_b = offsets[id_a], offsets[id_b] assert ovlp_a.shape == ovlp_b.shape, "%s, %s" % (str(ovlp_a.shape), str(ovlp_b.shape)) # need additional attributes to deterimine the actual overlap with h5py.File(path_a, 'r') as f: attrs = f['data'].attrs # we should maybe sanity check that these agree for block b ovlp_dim = attrs['overlap_dimension'] ovlp_begin = attrs['overlap_begin'] ovlp_end = attrs['overlap_end'] # find the ids ON the actual block boundary ovlp_len = ovlp_a.shape[ovlp_dim] ovlp_dim_begin = ovlp_len // 2 if ovlp_len % 2 == 1 else ovlp_len // 2 - 1 ovlp_dim_end = ovlp_len // 2 + 1 boundary = tuple(slice(ovlp_begin[i], ovlp_end[i]) if i != ovlp_dim else slice(ovlp_dim_begin, ovlp_dim_end) for i in range(3)) # measure all overlaps overlaps_ab = ngt.overlap(ovlp_a, ovlp_b) overlaps_ba = ngt.overlap(ovlp_b, ovlp_a) node_assignment = [] # find the ids ON the actual block boundary segments_a = np.unique(ovlp_a[boundary]) segments_b = np.unique(ovlp_b[boundary]) for seg_a in segments_a: # skip ignore label if seg_a == 0: continue ovlp_seg_a, counts_seg_a = overlaps_ab.overlapArraysNormalized(seg_a, sorted=True) seg_b = ovlp_seg_a[0] # skip ignore label if seg_b == 0: continue ovlp_seg_b, counts_seg_b = overlaps_ba.overlapArraysNormalized(seg_b, sorted=True) if ovlp_seg_b[0] != seg_a or seg_b not in segments_b: continue ovlp_measure = (counts_seg_a[0] + counts_seg_b[0]) / 2. if ovlp_measure > ovlp_threshold: node_assignment.append([seg_a + offset_a, seg_b + offset_b]) if node_assignment: return np.array(node_assignment, dtype='uint64') else: return None
def regression_test_isbi(cache_folder, data_folder): # if the cache does not exist, create it if not os.path.exists(os.path.join(cache_folder, 'isbi_train')): meta = init(cache_folder, data_folder, 'isbi') else: meta = MetaSet(cache_folder) meta.load() # isbi params params = ExperimentSettings() params.rf_cache_folder = os.path.join(cache_folder, "rf_cache") params.use_2d = True params.anisotropy_factor = 25. params.learn_2d = True params.ignore_mask = False params.n_trees = 500 params.weighting_scheme = "z" params.solver = "multicut_fusionmoves" local_feats_list = ("raw", "prob", "reg", "topo") lifted_feats_list = ("mc", "cluster", "reg") ds_train = meta.get_dataset('isbi_train') ds_test = meta.get_dataset('isbi_test') mc_seg = run_mc(ds_train, ds_test, local_feats_list, params) lmc_seg = run_lmc(ds_train, ds_test, local_feats_list, lifted_feats_list, params, 2.) #vigra.writeHDF5(mc_seg, './cache_isbi/isbi_test/mc_seg.h5', 'data', compression = 'gzip') #vigra.writeHDF5(lmc_seg, './cache_isbi/isbi_test/lmc_seg.h5', 'data', compression = 'gzip') print "Regression Test MC..." # Eval differences with same parameters and according regression thresholds # vi-split: 0.0718660622942 -> 0.1 vi_split_ref = 0.1 # vi-merge: 0.0811051987574 -> 0.1 vi_merge_ref = 0.1 # adapted-ri: 0.0218391269081 -> 0.05 adapted_ri_ref = 0.05 regression_test( vigra.readHDF5(os.path.join(data_folder, 'mc_seg.h5'), 'data'), mc_seg, vi_split_ref, vi_merge_ref, adapted_ri_ref) print "... passed" print "Regression Test LMC..." # Eval differences with same parameters and according regression thresholds # vi-split: 0.161923549092 -> 0.2 vi_split_ref = 0.2 # vi-merge: 0.0792288680404 -> 0.1 vi_merge_ref = 0.1 # adapted-ri: 0.0334914933439 -> 0.05 adapted_ri_ref = 0.05 regression_test( vigra.readHDF5(os.path.join(data_folder, 'lmc_seg.h5'), 'data'), lmc_seg, vi_split_ref, vi_merge_ref, adapted_ri_ref) print "... passed"
def view_res(res): from volumina_viewer import volumina_n_layer raw = vigra.readHDF5( "/home/constantin/Work/neurodata_hdd/neuroproof_data/raw_test.h5", "data") res = vigra.readHDF5(res, "data").astype(np.uint32) gt = vigra.readHDF5( "/home/constantin/Work/neurodata_hdd/neuroproof_data/gt_test.h5", "data").astype(np.uint32) volumina_n_layer([raw, res, gt])
def compare_caches_filters(filter_folder_1, filter_folder_2): filters = os.listdir(filter_folder_1) for ff in filters: ff1 = os.path.join(filter_folder_1, ff) ff2 = os.path.join(filter_folder_2, ff) + '00000' assert os.path.exists(ff2), ff2 filt1 = vigra.readHDF5(ff1, 'data') filt2 = vigra.readHDF5(ff2, 'data') assert np.allclose( filt1, filt2), "%s: %i / %i" % (ff, np.sum(np.isclose( filt1, filt2)), filt1.size)
def path_eval_on_sample(sample, half, defect_correct, project_folder, thresh_range): from evaluation import compute_path_error_rates print '\nEvaluating spl{}_z{}'.format(sample, half) print '--------------------' if defect_correct: defect_correct_str = '_defect_correct' else: defect_correct_str = '' # Load stuff source_folder = '/mnt/ssd/jhennies/neuraldata/cremi_2016/170606_resolve_false_merges/' # TODO: Change here experiment_folder = os.path.join(project_folder, 'spl{}_z{}/'.format(sample, half)) meta_folder = os.path.join(project_folder, 'cache/') test_name = 'spl{}_z{}'.format(sample, half) path_data_path = os.path.join(meta_folder, 'spl{}_z{}/path_data'.format(sample, half)) path_data_filepath = os.path.join(path_data_path, 'paths_ds_{}.h5'.format(test_name)) # TODO Change here when switching sample gt_file = os.path.join( source_folder, 'cremi.spl{}.train.raw_neurons{}.crop.axes_xyz.split_z.h5').format( sample, defect_correct_str) # TODO Change here when switching half gt_key = 'z/{}/neuron_ids'.format(half) gt = vigra.readHDF5(gt_file, gt_key) # Load paths paths = vigra.readHDF5(path_data_filepath, 'all_paths') if paths.size: paths = np.array([path.reshape((len(path) / 3, 3)) for path in paths]) paths_to_objs = vigra.readHDF5(path_data_filepath, 'paths_to_objs') with open(os.path.join(path_data_path, 'false_paths_predictions.pkl')) as f: false_merge_probs = pickle.load(f) print 'Number of paths = {}'.format(len(paths_to_objs)) print 'Number of objects = {}'.format(len(np.unique(paths_to_objs))) # Determine path error rates result_path, result_obj = compute_path_error_rates( paths_to_objs, paths, gt, false_merge_probs, thresh_range=thresh_range) return result_path, result_obj
def make_superpix_isbi2013(superpix = True): path_probs = "/home/constantin/Work/data_ssd/data_150615/isbi2013/pixel_probs/test-probs-nn.h5" key_probs = "exported_data" path_raw = "/home/constantin/Work/data_ssd/data_150615/isbi2013/test-input.h5" key_raw = "data" probs = vigra.readHDF5(path_probs, key_probs) probs = np.squeeze(probs) probs = np.array(probs) probs = 1. - probs raw = vigra.readHDF5(path_raw, key_raw) #volumina_n_layer( (raw, probs) ) #quit() if superpix: # use superpixel algorithm to segment the image # stack 2d segmented images segmentation = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ,dtype = np.uint32) seeds = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ,dtype = np.uint32) weights = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ,dtype = np.uint32) # need offset to keep superpixel of the individual layers seperate! offset = 0 for layer in range(probs.shape[2]): if layer != 0: offset = np.max(segmentation[:,:,layer-1]) #segmentation[:,:,layer] = watershed_superpixel_vigra(probs[:,:,layer], offset) res_wsdt = watershed_distancetransform_2d(probs[:,:,layer], offset) segmentation[:,:,layer] = res_wsdt[0] seeds[:,:,layer] = res_wsdt[1] weights[:,:,layer] = res_wsdt[2] #segmentation[:,:,2] = watershed_distancetransform_2d( probs[:,:,2], 0 ) volumina_n_layer( (probs, segmentation, seeds, weights) ) else: # use supervoxel algorithm to segment the image segmentation = watershed_distancetransform_3d(probs) volumina_n_layer( (raw, probs, segmentation) ) print "Number of superpixels:", segmentation.max() #quit() path = "/home/constantin/Work/data_ssd/data_150615/isbi2013/superpixel/" name = "watershed_nn_dt_supervox_test" fpath = path + name + ".h5" vigra.impex.writeHDF5(segmentation, fpath, "superpixel" )
def view_isbi(): raw = vigra.readHDF5('./cache_isbi/isbi_test/inp0.h5', 'data') pmap = vigra.readHDF5('./cache_isbi/isbi_test/inp1.h5', 'data') seg = vigra.readHDF5('./cache_isbi/isbi_test/seg0.h5', 'data') seg_mc = vigra.readHDF5('./cache_isbi/isbi_test/mc_seg.h5', 'data') seg_ref_mc = vigra.readHDF5('./data/isbi/mc_seg.h5', 'data') #seg_lmc = vigra.readHDF5('./cache_isbi/isbi_test/lmc_seg.h5', 'data') #seg_ref_lmc = vigra.readHDF5('./data/isbi/lmc_seg.h5', 'data') volumina_n_layer([raw, pmap, seg, seg_mc, seg_ref_mc], ['raw', 'pmap', 'seg', 'seg_mc', 'seg_ref_mc'])
def get_source(ds_str = "pedunculus"): assert ds_str in ("pedunculus",), ds_str # TODO more datasets!!! print "Loading Features and Labels for:", ds_str labelpath = '/home/constantin/Work/data_hdd/cache/cached_datasets/pedunculus/gt_face_segid1.h5' ffeatpath = '/home/constantin/Work/data_hdd/cache/cached_datasets/pedunculus/features/ffeats/ffeat_bert_1_True.h5' feats = np.nan_to_num( vigra.readHDF5(ffeatpath, 'data') ) labels = np.squeeze( vigra.readHDF5(labelpath, 'gt_face') ) assert feats.shape[0] == labels.shape[0] return (feats, labels)
def read(self): labelsPath = vigra.readHDF5(self.path, "labelsPath") labelsKey = vigra.readHDF5(self.path, "labelsKey") with h5py.File(self.path) as f: dtype = f.attrs['dtype'] if PipelineParameter().useN5Backend: labels = nz5.datasetWrapper(dtype, os.path.join(labelsPath, labelsKey)) else: h5_file = nh5.openFile(labelsPath) labels = nh5.Hdf5Array(dtype, h5_file, labelsKey) nNodes = vigra.readHDF5(self.path, "numberOfNodes") return nrag.readStackedRagFromHdf5(labels, nNodes, self.path)
def gt_isbi2012(): labels_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/train-labels.h5" raw_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/train-volume.h5" labels = vigra.readHDF5(labels_path, "labels") raw = vigra.readHDF5(raw_path, "data") labels = preprocess_for_bgsmoothing_isbi2012(labels) gt = smooth_background(labels).astype(np.uint32) volumina_n_layer( (raw, labels, gt) ) gt_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/groundtruth/ground_truth_seg.h5"
def regression_test_snemi(cache_folder, data_folder): # if the cache does not exist, create it if not os.path.exists(os.path.join(cache_folder, 'snmei_train')): meta = init(cache_folder, data_folder, 'snemi') else: meta = MetaSet(cache_folder) meta.load() # isbi params params = ExperimentSettings() params.rf_cache_folder = os.path.join(cache_folder, "rf_cache") params.use_2d = True params.learn_fuzzy = True params.anisotropy_factor = 5. params.ignore_mask = False params.n_trees = 500 params.weighting_scheme = "all" params.solver = "multicut_exact" params.lifted_neighborhood = 3 local_feats_list = ("raw", "prob", "reg", "topo") lifted_feats_list = ("cluster", "reg") ds_train = meta.get_dataset('snemi_train') ds_test = meta.get_dataset('snemi_test') mc_seg = run_mc(ds_train, ds_test, local_feats_list, params) gamma = 10000. lmc_seg = run_lmc(ds_train, ds_test, local_feats_list, lifted_feats_list, params, gamma) print "Regression Test MC..." # Eval differences with same parameters and according regression thresholds # vi-split: 0.0501385345177 -> 0.1 vi_split_ref = 0.1 # vi-merge: 0.049803253098 -> 0.1 vi_merge_ref = 0.1 # adaptred-ri: 0.0170138077554 -> 0.05 adapted_ri_ref = 0.05 regression_test( vigra.readHDF5(os.path.join(data_folder, 'mc_seg.h5'), 'data'), mc_seg, vi_split_ref, vi_merge_ref, adapted_ri_ref) print "... passed" print "Regression Test LMC..." # FIXME why are these differences so big? # vi-split: 0.291149212478 0.141228313621 0.0536859650649 regression_test( vigra.readHDF5(os.path.join(data_folder, 'lmc_seg.h5'), 'data'), lmc_seg) print "... passed"
def view_test(): raw = vigra.readHDF5( "/home/constantin/Work/neurodata_hdd/snemi3d_data/raw/test-input.h5", "data") icv1 = vigra.readHDF5( "/home/constantin/Work/neurodata_hdd/snemi3d_data/probabilities/pmaps_icv1_test.h5", "data") ciresan = vigra.readHDF5( "/home/constantin/Work/neurodata_hdd/snemi3d_data/probabilities/pmaps_ciresan_test.h5", "data") volumina_n_layer([raw, icv1, ciresan], ["raw", "pmap-icv1", "pmap-ciresan"])
def merge_blocks(ovlp_ids): id_a, id_b = ovlp_ids path_a = os.path.join(tmp_folder, 'block_%i_%i.h5' % (id_a, id_b)) path_b = os.path.join(tmp_folder, 'block_%i_%i.h5' % (id_b, id_a)) ovlp_a = vigra.readHDF5(path_a, 'data') ovlp_b = vigra.readHDF5(path_b, 'data') offset_a, offset_b = offsets[id_a], offsets[id_b] ovlp_a += offset_a ovlp_b += offset_b with h5py.File(path_a) as f: coords_a = f['data'].attrs['coords'] with h5py.File(path_b) as f: coords_b = f['data'].attrs['coords'] if ovlp_a.shape != ovlp_b.shape: print(coords_a) print(coords_b) assert ovlp_a.shape == ovlp_b.shape, "%s, %s" % (str( ovlp_a.shape), str(ovlp_b.shape)) # bb_a = tuple(slice(c_a[0], c_a[1]) for c_a in coords_a) # bb_b = tuple(slice(c_b[0], c_b[1]) for c_b in coords_b) # affs_a = ds_xy[bb_a] # affs_b = ds_xy[bb_b] # view([affs_a, ovlp_a, affs_b, ovlp_b], ['affs_a', 'seg_b', 'affs_b', 'seg_b']) # # quit() # measure all overlaps segments_a = np.unique(ovlp_a) overlaps_ab = ngt.overlap(ovlp_a, ovlp_b) overlaps_ba = ngt.overlap(ovlp_b, ovlp_a) node_assignment = [] for seg_a in segments_a: ovlp_seg_a, counts_seg_a = overlaps_ab.overlapArraysNormalized( seg_a, sorted=True) seg_b = ovlp_seg_a[0] ovlp_seg_b, counts_seg_b = overlaps_ba.overlapArraysNormalized( seg_b, sorted=True) if ovlp_seg_b[0] != seg_a: continue ovlp_measure = (counts_seg_a[0] + counts_seg_b[0]) / 2. if ovlp_measure > ovlp_threshold: node_assignment.append([seg_a, seg_b]) if node_assignment: return np.array(node_assignment, dtype='uint64') else: return None
def singleFunctionTest(feature_function, name): def singleFeatureTest(fu, typ, zDir): xname = 'feats_%s_%s_%i_xy.h5' % (name, typ, zDir) zname = 'feats_%s_%s_%i_z.h5' % (name, typ, zDir) xy_file = nh5.createFile(xname) z_file = nh5.createFile(zname) xy_shape = [ rag.totalNumberOfInSliceEdges if typ in ('xy', 'both') else 1, 9 if name == 'standard' else 9 * 12 ] xy_chunks = [min(2500, xy_shape[0]), xy_shape[1]] z_shape = [ rag.totalNumberOfInBetweenSliceEdges if typ in ('z', 'both') else 1, 9 if name == 'standard' else 9 * 12 ] z_chunks = [min(2500, z_shape[0]), z_shape[1]] xy_array = nh5.hdf5Array('float32', xy_file, 'data', xy_shape, xy_chunks) z_array = nh5.hdf5Array('float32', z_file, 'data', z_shape, z_chunks) fu(rag, self.dataArray, xy_array, z_array, zDirection=zDir) xfeats = xy_array.readSubarray([0, 0], xy_shape) zfeats = z_array.readSubarray([0, 0], z_shape) nh5.closeFile(xy_file) nh5.closeFile(z_file) os.remove(xname) os.remove(zname) return xname, zname, xfeats, zfeats for typ in ('both', 'xy', 'z'): if typ == 'both': new_fu = partial(feature_function, keepXYOnly=False, keepZOnly=False) elif typ == 'xy': new_fu = partial(feature_function, keepXYOnly=True, keepZOnly=False) elif typ == 'z': new_fu = partial(feature_function, keepXYOnly=False, keepZOnly=True) if typ == 'z': for zDir in (0, 1, 2): _, zname, _, zfeats = singleFeatureTest(new_fu, typ, zDir) ref_feats = vigra.readHDF5(os.path.join('./features', zname), 'data') self.assertTrue(numpy.allclose(zfeats, ref_feats)) else: zDir = 0 xname, zname, xfeats, zfeats = singleFeatureTest(new_fu, typ, zDir) ref_feats_xy = vigra.readHDF5(os.path.join('./features', xname), 'data') self.assertTrue(numpy.allclose(xfeats, ref_feats_xy)) if typ == 'both': ref_feats_z = vigra.readHDF5(os.path.join('./features', zname), 'data') self.assertTrue(numpy.allclose(zfeats, ref_feats_z))
def compare_all_segmentations(): aff_path = '/home/papec/mnt/papec/sampleB+_affs_cut.h5' print("Loading affinities") affs = 1. - vigra.readHDF5(aff_path, 'data') print(affs.dtype, affs.min(), affs.max()) print("Computing watershed") lrws = cseg.LRAffinityWatershed(threshold_cc=0.1, threshold_dt=0.2, sigma_seeds=2.) ws, n_labels = lrws(affs) print("Computing RAG") rag = nrag.gridRag(ws, numberOfLabels=n_labels+1) offsets = [[-1, 0, 0], [0, -1, 0], [0, 0, -1], [-2, 0, 0], [0, -3, 0], [0, 0, -3], [-3, 0, 0], [0, -9, 0], [0, 0, -9], [-4, 0, 0], [0, -27, 0], [0, 0, -27]] print("computing features") lifted_uvs, local_features, lifted_features = full_features(rag, affs, offsets) # nearest_probs = nearest_features(rag, affs) local_probs = local_features[:, 0] lifted_probs = lifted_features[:, 0] # load random forests rf_folder = '/home/papec/mnt/papec/Work/neurodata_hdd/cremi_warped/random_forests' with open(os.path.join(rf_folder, 'rf_ABC_local_affinity_feats.pkl'), 'rb') as f: rf1 = pickle.load(f) rf_local_probs = rf1.predict_proba(local_features)[:, 1] with open(os.path.join(rf_folder, 'rf_ABC_lifted_affinity_feats.pkl'), 'rb') as f: rf2 = pickle.load(f) rf_lifted_probs = rf2.predict_proba(lifted_features)[:, 1] print("computing multicuts") mc_local = mc(rag, local_probs) # mc_nearest = mc(rag, nearest_probs) mc_rf = mc(rag, rf_local_probs) print("computing lifted multicuts") lmc_local = lmc(rag, lifted_uvs, local_probs, lifted_probs) # lmc_nearest = lmc(rag, lifted_uvs, nearest_probs, lifted_probs) lmc_rf = lmc(rag, lifted_uvs, rf_local_probs, rf_lifted_probs) # print("Running MWS clustering") # mws_seg = mws_clustering(*lifted_problem) raw_path = '/home/papec/mnt/papec/sampleB+_raw_cut.h5' raw = vigra.readHDF5(raw_path, 'data') view([raw, ws, mc_local, mc_rf, lmc_local, lmc_rf], ['raw', 'ws', 'mc-local', 'mc-rf', 'lmc-local', 'lmc-rf'])
def load_large_neuro_data(): """ Load the large neuro dataset. :return: data_x, data_y """ data_x = vigra.readHDF5("data/neuro/test/ffeat_br_segid0.h5", "ffeat_br") data_y = numpy.array(vigra.readHDF5("data/neuro/test/gt_face_segid0.h5", "gt_face")[:, 0]) assert data_x.shape[0] == data_y.shape[0] # Remove NaN values. to_remove = numpy.where(numpy.isnan(data_x)) data_x = numpy.delete(data_x, to_remove, axis=0) data_y = numpy.delete(data_y, to_remove) return data_x, data_y
def load_very_small_neuro_data(): """ Load the 1000 neuro dataset. :return: data_x, data_y """ data_x = vigra.readHDF5("data/neuro/neuro_1000_raw_gt.h5", "raw") data_y = vigra.readHDF5("data/neuro/neuro_1000_raw_gt.h5", "gt") # Remove NaN values. to_remove = numpy.where(numpy.isnan(data_x)) data_x = numpy.delete(data_x, to_remove, axis=0) data_y = numpy.delete(data_y, to_remove) return data_x, data_y
def view_res(sample): in_file = './cremi_inputs/%s/test_files.json' % sample with open(in_file) as f: inputs = json.load(f) raw = vigra.readHDF5(inputs['data'][0], 'data').astype('uint32') pmap = vigra.readHDF5(inputs['data'][1], 'data') seg = vigra.readHDF5(inputs['seg'], 'data') gt = vigra.readHDF5(inputs['gt'], 'data') mc_path = os.path.join(inputs['cache'], 'MulticutSegmentation.h5') assert os.path.exists(mc_path) mc = vigra.readHDF5(mc_path, 'data') volumina_n_layer([raw, pmap, seg, gt, mc], ['raw', 'pmap', 'seg', 'gt', 'mc'])
def process_overlap(ovlp_ids): id_a, id_b = ovlp_ids ovlp_a = vigra.readHDF5( os.path.join(tmp_folder, 'block_%i_%i.h5' % (id_a, id_b)), 'data') ovlp_b = vigra.readHDF5( os.path.join(tmp_folder, 'block_%i_%i.h5' % (id_b, id_a)), 'data') # match the non-zero ids labeled = ovlp_a != 0 ids_a, ids_b = ovlp_a[labeled], ovlp_b[labeled] node_assignment = np.concatenate([ids_a[None], ids_b[None]], axis=0).transpose() if node_assignment.size: node_assignment = np.unique(node_assignment, axis=0) return node_assignment
def save_all_features_for_all_files(path_to_files,features_path=None,classes_path=None): """ Computes all paths for all soundfiles and saves them in a feature_vector to a file with the names if wanted :param path_to_files: :param path_to_save: :return: 1. array with all features for all soundwaves and 2. all the according classes """ if features_path!=None and classes_path!=None: if os.path.exists(features_path) and os.path.exists(classes_path): print("Features and classes exist, loading") features_of_all=readHDF5(features_path, "features") soundtypes=np.load(classes_path) return features_of_all,soundtypes soundwaves,soundtypes,actual_file_names=import_sounds(path_to_files) features_of_all=np.array([extract_features(np.float64(soundwave),samplingrate,soundtypes[idx_soundwave]) for idx_soundwave,(samplingrate,soundwave) in enumerate(soundwaves)]) if features_path!=None and classes_path!=None: print("Saving features") writeHDF5(features_of_all,features_path,"features",compression="gzip") np.save(classes_path,soundtypes) return features_of_all,soundtypes
def extract_paths_from_segmentation( ds, seg_path, key, paths_cache_folder=None, anisotropy=[1,1,10]): """ extract paths from segmentation, for pipeline """ if False: pass else: seg = vigra.readHDF5(seg_path, key) dt = ds.inp(ds.n_inp - 1) all_paths = [] paths_to_objs = [] # #creating distance transform of whole volume for border near paths # volume_expanded = np.ones((dt.shape[0]+2,dt.shape[1]+2,dt.shape[1]+2)) # volume_expanded[1:-1, 1:-1, 1:-1] = 0 # volume_dt = vigra.filters.distanceTransform( # volume_expanded.astype("uint32"), background=True, # pixel_pitch=[10, 1, 1])[1:-1, 1:-1, 1:-1] # # #threshhold for distance transform for picking terminal # #points near boundary # threshhold_boundary=30 # volume_where_threshhold = np.where(volume_dt > threshhold_boundary) # volume_dt_boundaries = np.s_[min(volume_where_threshhold[0]):max(volume_where_threshhold[0]), # min(volume_where_threshhold[1]):max(volume_where_threshhold[1]), # min(volume_where_threshhold[2]):max(volume_where_threshhold[2])] #for counting and debugging purposes len_uniq=len(np.unique(seg))-1 centres_dict = compute_border_contacts_old(seg, dt) #parallelized path computation parallel_array=[parallel_wrapper(seg, dt, gt, anisotropy, key, len_uniq, centres_dict[key],"testing") if len(centres_dict[key]) > 0 else parallel_wrapper(seg, dt, gt, anisotropy, key, len_uniq, [],"testing") for key in centres_dict.keys()] [[all_paths.append(path) for path in seg_array[0] if seg_array!=[]] for seg_array in parallel_array] [[paths_to_objs.append(path_to_obj) for path_to_obj in seg_array[1] if seg_array!=[]] for seg_array in parallel_array] all_paths=np.array(all_paths) paths_to_objs=np.array(paths_to_objs, dtype="float64") return all_paths, paths_to_objs
def run(self): inp = self.input() seg = inp["seg"] seg.open(self.keyToSeg) # if we have defects, we need to skip the completly defected slices in the node extraction, # because nodes inside them are completely excluded from the graph now if PipelineParameter().defectPipeline: defect_slices = vigra.readHDF5(inp["defect_slices"].path, 'defect_slices').astype('int64').tolist() workflow_logger.info("NodesToBlocks: Skipping slices %s due to defects." % str(defect_slices)) else: defect_slices = [] blocking = nifty.tools.blocking(roiBegin=[0, 0, 0], roiEnd=seg.shape(self.keyToSeg), blockShape=self.blockShape) number_of_blocks = blocking.numberOfBlocks block_overlap = list(self.blockOverlap) n_workers = min(number_of_blocks, PipelineParameter().nThreads) # nWorkers = 1 block_result = nifty.tools.nodesToBlocksStacked(seg.get(self.keyToSeg), blocking, block_overlap, defect_slices, n_workers) block_result = [np.array(b_res, dtype=self.dtype) for b_res in block_result] self.output().writeVlen(block_result) seg.close()
def label_names(self): """Returns the names of the labels of the dataset. :return: names of the labels of the dataset :rtype: numpy.ndarray """ return vigra.readHDF5(self.project_filename, const.label_names())
def make_superpix_isbi2012(): path_probs = "/home/constantin/Work/data_ssd/data_090615/isbi2012/pixel_probabilities/probs_train_final.h5" #path_unet = "/home/constantin/Work/data_ssd/data_090615/isbi2012/u-net_probs/u-net_probs_test.h5" key_probs = "exported_data" probs = vigra.readHDF5(path_probs, key_probs) probs = np.squeeze(probs) #probs = 1. - probs # use superpixel algorithm to segment the image # stack 2d segmented images segmentation = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ) # need offset to keep superpixel of the individual layers seperate! offset = 0 for layer in range(probs.shape[2]): if layer != 0: offset = np.max(segmentation[:,:,layer-1]) res_wsdt = watershed_distancetransform_2d(probs[:,:,layer], offset) segmentation[:,:,layer] = res_wsdt[0] #volumina_double_layer(probs,segmentation) #quit() path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/superpixel/" name = "watershed_dt_train" fpath = path + name + ".h5" vigra.impex.writeHDF5(segmentation, fpath, "superpixel" )
def project_gt_isbi2012(): labels_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/train-labels.h5" gt_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/groundtruth/gt_mc.h5" raw_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/train-volume.h5" labels = vigra.readHDF5(labels_path, "labels") gt = vigra.readHDF5(gt_path, "gt") raw = vigra.readHDF5(raw_path, "data") gt = project_gt(labels, gt) save_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/groundtruth/gt_mc_bkg.h5" volumina_n_layer( (raw, gt, labels) ) vigra.writeHDF5(gt, save_path, "gt")
def make_superpix_sopnetcomparison(): path_probs = "/home/constantin/Work/data_ssd/data_110915/sopnet_comparison/pixel_probabilities/probs-final_autocontext.h5" key_probs = "data" probs = vigra.readHDF5(path_probs, key_probs) segmentation = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ,dtype = np.uint32) seeds = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ,dtype = np.uint32) weights = np.zeros( (probs.shape[0], probs.shape[1], probs.shape[2]) ,dtype = np.uint32) # need offset to keep superpixel of the individual layers seperate! offset = 0 for layer in range(probs.shape[2]): if layer != 0: offset = np.max(segmentation[:,:,layer-1]) #segmentation[:,:,layer] = watershed_superpixel_vigra(probs[:,:,layer], offset) res_wsdt = watershed_distancetransform_2d(probs[:,:,layer], offset) segmentation[:,:,layer] = res_wsdt[0] seeds[:,:,layer] = res_wsdt[1] weights[:,:,layer] = res_wsdt[2] #segmentation[:,:,2] = watershed_distancetransform_2d( probs[:,:,2], 0 ) print "Number of superpixels:", segmentation.max() path = "/home/constantin/Work/data_ssd/data_110915/sopnet_comparison/superpixel/" name = "watershed_dt_mitooff" fpath = path + name + ".h5" vigra.impex.writeHDF5(segmentation, fpath, "superpixel" )
def make_superpix_from_intepolation(prob_path, prob_key, save_path, anisotropy): from wsDtSegmentation import wsDtSegmentation pmem = vigra.readHDF5(prob_path, prob_key) print pmem.shape print anisotropy # for some datasets, we have to invert the probabilities #probs = 1. - probs # interpolate the probability in z - direction print "doing spline interpolation" pmem_interpol = vigra.sampling.resize(pmem, shape=(pmem.shape[0], pmem.shape[1], anisotropy* pmem.shape[2])) pmem_interpol = np.array(pmem_interpol) print "Finished interpolation" superpix = wsDtSegmentation(pmem_interpol, 0.45, 20, 100, 1.6, 2.)[0] superpix = superpix[:,:,::anisotropy] #volumina_n_layer( [pmem, superpix.astype(np.uint32)] ) assert superpix.shape == pmem.shape vigra.writeHDF5(superpix, save_path, "superpixel")
def compare_rags_from_files(labels_file, labels_key): with vigra.Timer("Chunked Nifty Rag"): rag_c = chunked_rag(labels_file, labels_key, numberOfThreads = 1) edges_c = rag_c.numberOfEdges print edges_c nodes_c = rag_c.numberOfNodes del rag_c labels = vigra.readHDF5(labels_file, labels_key).astype('uint32') with vigra.Timer("Nifty Rag"): rag_n = normal_rag(labels, numberOfThreads = -1 ) edges_n = rag_n.numberOfEdges nodes_n = rag_n.numberOfNodes with vigra.Timer("Vigra Rag"): rag_v = vigra.graphs.regionAdjacencyGraph(vigra.graphs.gridGraph(labels.shape), labels) nodes_v = rag_v.nodeNum edges_v = rag_v.edgeNum assert nodes_c == nodes_n, str(nodes_c) + " , " + str(nodes_n) #assert nodes_v == nodes_n, str(nodes_v) + " , " + str(nodes_n) assert edges_c == edges_n, str(edges_c) + " , " + str(edges_n) assert edges_v == edges_n, str(edges_v) + " , " + str(edges_n) print "Checks out"
def get_output_data(self, data_nr): """Returns the dataset that was produced by ilastik. :param data_nr: number of dataset :return: output dataset of ilastik """ return vigra.readHDF5(self._get_output_data_path(data_nr), const.default_export_key())
def get_train_data(path): label_to_num, num_to_label = get_label_dictionary(path) data = [] labels = [] keys = label_to_num.keys() for key in keys: data_i = vigra.readHDF5(path, key) data_i = np.array([x.flatten() for x in data_i]) label = label_to_num[key] labels_i = label*np.ones( data_i.shape[0] ) data.append(data_i) labels.append(labels_i) # shuffle all the data data = np.concatenate(data) labels = np.concatenate(labels) assert len(data.shape) == 2 assert len(labels.shape) == 1 assert labels.shape[0] == data.shape[0] p = np.random.permutation(data.shape[0]) data = data[p] labels = labels[p] return data, labels
def load_feats_and_gt_pedunculus(): #raw_data = vigra.readHDF5( # "/home/constantin/Work/data_ssd/data_080515/pedunculus/150401pedunculus_middle_512x512_first30_sliced.h5", # "data" # ) gt = vigra.readVolume( "/home/constantin/Work/data_ssd/data_080515/pedunculus/150401_pedunculus_membrane_labeling.tif") gt = np.squeeze(gt) # delete black slice gt = np.delete(gt, 6, axis = 2) gt[gt == 0.] = 1 gt[gt == 255.] = 0 gt = gt.astype(np.uint32) save_path = "/home/constantin/Work/data_ssd/data_080515/pedunculus/features" #compute_ilastik_2dfeatures(raw_data, save_path) feats_path = os.path.join( save_path, "all_features.h5") # make sure that features are computed! #feats = load_precomputed_feats(save_path, raw_data.shape) #vigra.writeHDF5(feats, feats_path, "data") feats = vigra.readHDF5(feats_path, "data") return (feats, gt)
def crossvalidation_mnist(): dat_train = vigra.readHDF5("../data/mnist/mnist-train.h5", "data") dat_train = dat_train.reshape( (dat_train.shape[0], dat_train.shape[1]*dat_train.shape[2]) ) lbl_train = vigra.readHDF5("../data/mnist/mnist-train.h5", "label") dat_test = vigra.readHDF5("../data/mnist/mnist-test.h5", "data") dat_test = dat_test.reshape( (dat_test.shape[0], dat_test.shape[1]*dat_test.shape[2]) ) lbl_test = vigra.readHDF5("../data/mnist/mnist-test.h5", "label") min_error = 1. best_params = (0,0) out = open('crossvalidation_mnist.txt','w') out.write("Estimators, min_samples, test_error, train_time, test_time") out.write('\n') for estimators in (100,255,500,750): for min_samples in (1,5,10,20): print "Start run with n_estimators =",estimators, "min_samples =", min_samples # 5 runs to account for randomness errors = [] times_train = [] times_test = [] for _ in range(5): t_0 = t.time() rf = learn_rf(dat_train, lbl_train, estimators, min_samples) times_train.append(t.time() - t_0) t_1 = t.time() errors.append(evaluate_rf(rf, dat_test, lbl_test)) times_test = t.time() - t_1 err = np.mean(errors) t_train = np.mean(times_train) t_test = np.mean(times_test) res = str(estimators) + '\t' + str(min_samples) + '\t ' + str(err) + '\t' + str(t_train) + '\t ' + str(t_test) + '\n' out.write(res) if err < min_error: min_error = err best_params = (estimators, min_samples) print "Cross Validation found best test_error:", min_error, "for", best_params return best_params
def get_axistags(self, data_nr): """Returns the axistags of the dataset as they are in the project file. :param data_nr: number of dataset :return: axistags of dataset :rtype: str """ return vigra.readHDF5(self.project_filename, const.axistags(data_nr))
def get_axisorder(self, data_nr): """Returns the axisorder of the dataset. :param data_nr: number of dataset :return: axisorder of dataset :rtype: str """ return vigra.readHDF5(self.project_filename, const.axisorder(data_nr))
def train_mnist(estimators, min_samples): dat_train = vigra.readHDF5("../data/mnist/mnist-train.h5", "data") dat_train = dat_train.reshape( (dat_train.shape[0], dat_train.shape[1]*dat_train.shape[2]) ) lbl_train = vigra.readHDF5("../data/mnist/mnist-train.h5", "label") dat_test = vigra.readHDF5("../data/mnist/mnist-test.h5", "data") dat_test = dat_test.reshape( (dat_test.shape[0], dat_test.shape[1]*dat_test.shape[2]) ) lbl_test = vigra.readHDF5("../data/mnist/mnist-test.h5", "label") print "Training sklearn on MNIST" rf_mnist = learn_rf(dat_train, lbl_train,estimators,min_samples) print "Finished Training" err = evaluate_rf(rf_mnist, dat_test, lbl_test) print "Error on training set:", err save_path ='../data/rf/rf_mnist.pkl' print "Saving RF to", save_path with open(save_path, 'wb') as f: cPickle.dump(rf_mnist,f)
def get_data(self, data_nr): """Returns the dataset. :param data_nr: number of dataset :return: the dataset """ if self._datatype(data_nr) == "hdf5" or self.is_internal(data_nr): return vigra.readHDF5(self.get_data_path(data_nr), self.get_data_key(data_nr)) else: return vigra.readImage(self.get_data_path(data_nr))
def get_dataset_id(self, data_nr): """Returns the ilp dataset id. :param data_nr: number of dataset :return: dataset id :rtype: str """ h5_key = const.datasetid(data_nr) dataset_id = vigra.readHDF5(self.project_filename, h5_key) return dataset_id
def get_data_location(self, data_nr): """Returns the data location (either "ProjectInternal" or "FileSystem"). :param data_nr: number of dataset :return: data location :rtype: str """ h5_key = const.datalocation(data_nr) data_location = vigra.readHDF5(self.project_filename, h5_key) return data_location
def project_gt_pedunculus(): labels_path = "/home/constantin/Work/data_ssd/data_080515/pedunculus/150401_pedunculus_membrane_labeling.tif" gt_path = "/home/constantin/Work/data_ssd/data_080515/pedunculus/gt_mc.h5" raw_path = "/home/constantin/Work/data_ssd/data_080515/pedunculus/150401pedunculus_middle_512x512_first30_sliced.h5" labels = vigra.readVolume(labels_path) labels = np.squeeze(labels) labels = np.delete(labels, 6, axis = 2) gt = vigra.readHDF5(gt_path, "gt") raw = vigra.readHDF5(raw_path, "data") gt = project_gt(labels, gt) save_path = "/home/constantin/Work/data_ssd/data_080515/pedunculus/gt_mc_bkg.h5" volumina_n_layer( (raw, gt, labels) ) vigra.writeHDF5(gt, save_path, "gt")
def compute_features_isbi2012(): raw_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/train-volume.h5" raw_key = "data" raw = vigra.readHDF5(raw_path, raw_key) sigmas = ( 0.3, 0.7, 1.0, 1.6, 3.5, 5.0 ) feature_path = "/home/constantin/Work/data_ssd/data_090615/isbi2012/features/train-" compute_ilastik_2dfeatures(raw, feature_path, sigmas)
def synapse_images(two_dim = True): in_old = "/home/akreshuk/data/connector_archive_2g0y0b/distance_tests/*2d_pred.h5" in_new = "/data/connector_archive_2g0y0b/test_data/stage_2_output/*.h5" outdir = "/data/connector_archive_2g0y0b/compare_membranes/" in_old_list = glob.glob(in_old) in_old_list = sorted(in_old_list, key=str.lower) in_new_list = glob.glob(in_new) in_new_list = sorted(in_new_list, key=str.lower) for old_name, new_name in zip(in_old_list, in_new_list): print old_name dnew = vigra.readHDF5(new_name, "exported_data") dold = vigra.readHDF5(old_name, "exported_data") print dnew.shape, dnew.dtype, numpy.min(dnew), numpy.max(dnew) print dold.shape, dold.dtype, numpy.min(dold), numpy.max(dold) #convert the old ones to uint8, 0-255 dold = dold*255 dold = dold.astype(numpy.uint8) _, old_fname = os.path.split(old_name) parts = old_fname.split("_") dset_name = parts[0] if not os.path.exists(outdir+dset_name): os.makedirs(outdir+dset_name) if not os.path.exists(outdir+dset_name+"/old"): os.makedirs(outdir+dset_name+"/old") if not os.path.exists(outdir+dset_name+"/autocontext"): os.makedirs(outdir+dset_name+"/autocontext") if two_dim: vigra.impex.writeImage(dnew[:, :, 5, 1], outdir+dset_name+"/autocontext/membranes_z5.png" ) vigra.impex.writeImage(dold[:, :, 1], outdir+dset_name+"/old/membranes_z5.png") else: for z in range(dnew.shape[2]): vigra.impex.writeImage(dnew[:, :, z, 2], outdir+dset_name+"/autocontext/%.02d"%z+".png") vigra.impex.writeImage(dold[:, :, z, 2], outdir+dset_name+"/old/%.02d"%z+".png")
def malis_2d(aff_path): # import all the malis functionality we need from malis import mknhood2d, affgraph_to_edgelist, malis_loss_weights # load affinity graph ( only the zeroth slice and the x and y weights ) aff = vigra.readHDF5(aff_path, "data")[:,:,0,:2].transpose( (2,1,0,3) ) # make the 2d neighborhood nhood = mknhood2d() # get the node connectors and weights from the affinitygraph connectors1, connectors2, edge_weights = affgraph_to_edgelist(aff, nhood) print connectors1.shape, connectors2.shape, edge_weights.shape
def load_data(labels=None): """ Load the data sets. :param labels: list with the labels that should be used :return: train_x, train_y, test_x, test_y """ # Load the data. train_x = numpy.array(vigra.readHDF5("data/train.h5", "data").transpose()) train_y = vigra.readHDF5("data/train.h5", "labels") test_x = numpy.array(vigra.readHDF5("data/test.h5", "data").transpose()) test_y = vigra.readHDF5("data/test.h5", "labels") # Reduce the data to the given labels. if labels is not None: train_indices = numpy.array([i for i, t in enumerate(train_y) if t in labels]) train_x = train_x[train_indices] train_y = train_y[train_indices] test_indices = numpy.array([i for i, t in enumerate(test_y) if t in labels]) test_x = test_x[test_indices] test_y = test_y[test_indices] return train_x, train_y, test_x, test_y
def load_neuro_data(): """ Load the neuro dataset. :return: train_x, train_y, test_x, test_y """ # Load the data. train_x = vigra.readHDF5("data/neuro/train/ffeat_br_segid0.h5", "ffeat_br") train_y = numpy.array(vigra.readHDF5("data/neuro/train/gt_face_segid0.h5", "gt_face")[:, 0]) test_x = vigra.readHDF5("data/neuro/test/ffeat_br_segid0.h5", "ffeat_br") test_y = numpy.array(vigra.readHDF5("data/neuro/test/gt_face_segid0.h5", "gt_face")[:, 0]) assert train_x.shape[0] == train_y.shape[0] assert test_x.shape[0] == test_y.shape[0] assert train_x.shape[1] == test_x.shape[1] # Remove NaN values. to_remove = numpy.where(numpy.isnan(train_x)) train_x = numpy.delete(train_x, to_remove, axis=0) train_y = numpy.delete(train_y, to_remove) to_remove = numpy.where(numpy.isnan(test_x)) test_x = numpy.delete(test_x, to_remove, axis=0) test_y = numpy.delete(test_y, to_remove) return train_x, train_y, test_x, test_y
def run(args): x = vigra.readHDF5(args.h5file, args.internal_path) t = x[:, 0]/float(24*60*60*1e6) data = vigra.taggedView(x[:, 1], axistags='t') op = OpExponentiallySegmentedPattern(graph=Graph()) op.Input.setValue(data) op.BaselineSize.setValue(60) op.NumSegments.setValue(6) out = op.Output[...].wait() leg = ["mean cpu usage over {} hours".format(2**i) for i in range(out.shape[1])] plt.plot(t, out) plt.legend(leg) plt.show()