def GenerateEdges(prefix, segmentation, subset, seg2gold_mapping=None): # parameters from CVPR submission network_radius = 600 maximum_distance = 500 width = (18, 52, 52) # create the directory structure to save the features in # forward is needed for training and validation data that is cropped CreateDirectoryStructure(width, network_radius, ['training', 'validation', 'testing', 'forward'], 'edges') # get the size of the data zres, yres, xres = segmentation.shape # make sure the subset is one of three categories assert (subset == 'training' or subset == 'validation' or subset == 'testing') # crop the subset if it overlaps with testing data ((cropped_zmin, cropped_zmax), (cropped_ymin, cropped_ymax), (cropped_xmin, cropped_xmax)) = dataIO.CroppingBox(prefix) # call the function to actually generate the edges edges = EndpointTraversal(prefix, segmentation, maximum_distance) # create list for all relevant examples positive_examples = [] negative_examples = [] unknown_examples = [] forward_positive_examples = [] forward_negative_examples = [] forward_unknown_examples = [] for edge in edges: zpoint, ypoint, xpoint = (edge[IB_Z], edge[IB_Y], edge[IB_X]) label_one, label_two = edge[3], edge[4] # if the center of the point falls outside the cropped box do not include it in training or validation forward = False # however, you allow it for forward inference if (zpoint < cropped_zmin or cropped_zmax <= zpoint): forward = True if (ypoint < cropped_ymin or cropped_ymax <= ypoint): forward = True if (xpoint < cropped_xmin or cropped_xmax <= xpoint): forward = True # see if these two segments belong to the same neuron if not seg2gold_mapping is None: gold_one = seg2gold_mapping[label_one] gold_two = seg2gold_mapping[label_two] else: gold_one = -1 gold_two = -1 # create lists of locations where these point occur if forward: if gold_one < 1 or gold_two < 1: forward_unknown_examples.append(edge) elif gold_one == gold_two: forward_positive_examples.append(edge) else: forward_negative_examples.append(edge) else: if gold_one < 1 or gold_two < 1: unknown_examples.append(edge) elif gold_one == gold_two: positive_examples.append(edge) else: negative_examples.append(edge) print 'No. Positive Edges: {}'.format(len(positive_examples)) print 'No. Negative Edges: {}'.format(len(negative_examples)) print 'No. Unknown Edges: {}'.format(len(unknown_examples)) parent_directory = 'features/biological/edges-{}nm-{}x{}x{}'.format( network_radius, width[IB_Z], width[IB_Y], width[IB_X]) if len(positive_examples): # save the examples positive_filename = '{}/{}/positives/{}.examples'.format( parent_directory, subset, prefix) with open(positive_filename, 'wb') as fd: fd.write(struct.pack('q', len(positive_examples))) for ie, example in enumerate(positive_examples): fd.write( struct.pack('qqqqqq', example[0], example[1], example[2], example[3], example[4], example[5])) # create new examples array to remove last element examples = [] for example in positive_examples: examples.append(example[0:5]) positive_examples_array = GenerateExamplesArray( prefix, segmentation, examples, width, network_radius) dataIO.WriteH5File(positive_examples_array, '{}/{}/positives/{}-examples.h5'.format( parent_directory, subset, prefix), 'main', compression=True) del positive_examples_array if len(negative_examples): # save the examples negative_filename = '{}/{}/negatives/{}.examples'.format( parent_directory, subset, prefix) with open(negative_filename, 'wb') as fd: fd.write(struct.pack('q', len(negative_examples))) for example in negative_examples: fd.write( struct.pack('qqqqqq', example[0], example[1], example[2], example[3], example[4], example[5])) # create new examples array to remove last element examples = [] for example in negative_examples: examples.append(example[0:5]) negative_examples_array = GenerateExamplesArray( prefix, segmentation, examples, width, network_radius) dataIO.WriteH5File(negative_examples_array, '{}/{}/negatives/{}-examples.h5'.format( parent_directory, subset, prefix), 'main', compression=True) del negative_examples_array if len(unknown_examples): # save the examples unknown_filename = '{}/{}/unknowns/{}.examples'.format( parent_directory, subset, prefix) with open(unknown_filename, 'wb') as fd: fd.write(struct.pack('q', len(unknown_examples))) for example in unknown_examples: fd.write( struct.pack('qqqqqq', example[0], example[1], example[2], example[3], example[4], example[5])) # create new examples array to remove last element examples = [] for example in unknown_examples: examples.append(example[0:5]) unknown_examples_array = GenerateExamplesArray(prefix, segmentation, examples, width, network_radius) dataIO.WriteH5File(unknown_examples_array, '{}/{}/unknowns/{}-examples.h5'.format( parent_directory, subset, prefix), 'main', compression=True) del unknown_examples_array if len(forward_positive_examples): # save the examples forward_positive_filename = '{}/forward/positives/{}.examples'.format( parent_directory, prefix) with open(forward_positive_filename, 'wb') as fd: fd.write(struct.pack('q', len(forward_positive_examples))) for example in forward_positive_examples: fd.write( struct.pack('qqqqqq', example[0], example[1], example[2], example[3], example[4], example[5])) # create new examples array to remove last element examples = [] for example in forward_positive_examples: examples.append(example[0:5]) forward_positive_examples_array = GenerateExamplesArray( prefix, segmentation, examples, width, network_radius) dataIO.WriteH5File(forward_positive_examples_array, '{}/forward/positives/{}-examples.h5'.format( parent_directory, prefix), 'main', compression=True) del forward_positive_examples_array if len(forward_negative_examples): # save the examples forward_negative_filename = '{}/forward/negatives/{}.examples'.format( parent_directory, prefix) with open(forward_negative_filename, 'wb') as fd: fd.write(struct.pack('q', len(forward_negative_examples))) for example in forward_negative_examples: fd.write( struct.pack('qqqqqq', example[0], example[1], example[2], example[3], example[4], example[5])) # create new examples array to remove last element examples = [] for example in forward_negative_examples: examples.append(example[0:5]) forward_negative_examples_array = GenerateExamplesArray( prefix, segmentation, examples, width, network_radius) dataIO.WriteH5File(forward_negative_examples_array, '{}/forward/negatives/{}-examples.h5'.format( parent_directory, prefix), 'main', compression=True) del forward_negative_examples_array if len(forward_unknown_examples): # save the examples forward_unknown_filename = '{}/forward/unknowns/{}.examples'.format( parent_directory, prefix) with open(forward_unknown_filename, 'wb') as fd: fd.write(struct.pack('q', len(forward_unknown_examples))) for example in forward_unknown_examples: fd.write( struct.pack('qqqqqq', example[0], example[1], example[2], example[3], example[4], example[5])) # create new examples array to remove last element examples = [] for example in forward_unknown_examples: examples.append(example[0:5]) forward_unknown_examples_array = GenerateExamplesArray( prefix, segmentation, examples, width, network_radius) dataIO.WriteH5File(forward_unknown_examples_array, '{}/forward/unknowns/{}-examples.h5'.format( parent_directory, prefix), 'main', compression=True) del forward_unknown_examples_array
def CollapseGraph(prefix, segmentation, vertex_ones, vertex_twos, maintained_edges, algorithm, evaluate): # get the number of edges nedges = maintained_edges.shape[0] # create the union find data structure and collapse the graph max_label = np.amax(segmentation) + 1 union_find = [unionfind.UnionFindElement(iv) for iv in range(max_label)] # go through all of the edges for ie in range(nedges): # skip if the edge should not collapse if maintained_edges[ie]: continue # merge these vertices vertex_one = vertex_ones[ie] vertex_two = vertex_twos[ie] unionfind.Union(union_find[vertex_one], union_find[vertex_two]) # create the mapping and save the result mapping = np.zeros(max_label, dtype=np.int64) for iv in range(max_label): mapping[iv] = unionfind.Find(union_find[iv]).label # apply the mapping and save the result seg2seg.MapLabels(segmentation, mapping) segmentation_filename = 'segmentations/{}-{}.h5'.format(prefix, algorithm) dataIO.WriteH5File(segmentation, segmentation_filename, 'main') # spawn a new meta file dataIO.SpawnMetaFile(prefix, segmentation_filename, 'main') # evaluate if gold data exists if evaluate: # get the variation of information for this result new_prefix = segmentation_filename.split('/')[1][:-3] # read in the new gold data gold = dataIO.ReadGoldData(prefix) rand_error, vi = comparestacks.VariationOfInformation( new_prefix, segmentation, gold) print 'Rand Error Full: {}'.format(rand_error[0] + rand_error[1]) print 'Rand Error Merge: {}'.format(rand_error[0]) print 'Rand Error Split: {}'.format(rand_error[1]) print 'Variation of Information Full: {}'.format(vi[0] + vi[1]) print 'Variation of Information Merge: {}'.format(vi[0]) print 'Variation of Information Split: {}'.format(vi[1]) if not os.path.exists('results'): os.mkdir('results') with open('results/{}-{}.txt'.format(prefix, algorithm), 'w') as fd: fd.write('Rand Error Full: {}\n'.format(rand_error[0] + rand_error[1])) fd.write('Rand Error Merge: {}\n'.format(rand_error[0])) fd.write('Rand Error Split: {}\n'.format(rand_error[1])) fd.write('Variation of Information Full: {}\n'.format(vi[0] + vi[1])) fd.write('Variation of Information Merge: {}\n'.format(vi[0])) fd.write('Variation of Information Split: {}\n'.format(vi[1]))
def MergeGroundTruth(prefix, model_prefix): # read the segmentation data segmentation = dataIO.ReadSegmentationData(prefix) # get the multicut filename (with graph weights) multicut_filename = 'multicut/{}-{}.graph'.format(model_prefix, prefix) # read the gold data gold = dataIO.ReadGoldData(prefix) # read in the segmentation to gold mapping mapping = seg2gold.Mapping(segmentation, gold) # get the maximum segmentation value max_value = np.amax(segmentation) + 1 # create union find data structure union_find = [UnionFind.UnionFindElement(iv) for iv in range(max_value)] # read in all of the labels with open(multicut_filename, 'rb') as fd: # read the number of vertices and edges nvertices, nedges, = struct.unpack('QQ', fd.read(16)) # read in all of the edges for ie in range(nedges): # read in the two labels label_one, label_two, = struct.unpack('QQ', fd.read(16)) # skip over the reduced labels and edge weight fd.read(24) # if the labels are the same and the mapping is non zero if mapping[label_one] == mapping[label_two] and mapping[label_one]: UnionFind.Union(union_find[label_one], union_find[label_two]) # create a mapping mapping = np.zeros(max_value, dtype=np.int64) # update the segmentation for iv in range(max_value): label = UnionFind.Find(union_find[iv]).label mapping[iv] = label merged_segmentation = seg2seg.MapLabels(segmentation, mapping) gold_filename = 'gold/{}_gold.h5'.format(prefix) # TODO fix this code temporary filename truth_filename = 'multicut/{}-truth.h5'.format(prefix) # temporary write h5 file dataIO.WriteH5File(merged_segmentation, truth_filename, 'stack') import time start_time = time.time() print ('Ground truth: ') # create the command line command = '~/software/PixelPred2Seg/comparestacks --stack1 {} --stackbase {} --dilate1 1 --dilatebase 1 --relabel1 --relabelbase --filtersize 100 --anisotropic'.format(truth_filename, gold_filename) # execute the command os.system(command) print (time.time() - start_time)
def Agglomerate(prefix, model_prefix, threshold=0.5): # read the segmentation data segmentation = dataIO.ReadSegmentationData(prefix) # get the multicut filename (with graph weights) multicut_filename = 'multicut/{}-{}.graph'.format(model_prefix, prefix) # get the maximum segmentation value max_value = np.amax(segmentation) + 1 # create union find data structure union_find = [UnionFind.UnionFindElement(iv) for iv in range(max_value)] # read in all of the labels and merge the result with open(multicut_filename, 'rb') as fd: # read the number of vertices and edges nvertices, nedges, = struct.unpack('QQ', fd.read(16)) # read in all of the edges for ie in range(nedges): # read in both labels label_one, label_two, = struct.unpack('QQ', fd.read(16)) # skip over the reduced labels fd.read(16) # read in the edge weight edge_weight, = struct.unpack('d', fd.read(8)) # merge label one and label two in the union find data structure if (edge_weight > threshold): UnionFind.Union(union_find[label_one], union_find[label_two]) # create a mapping mapping = np.zeros(max_value, dtype=np.int64) # update the segmentation for iv in range(max_value): label = UnionFind.Find(union_find[iv]).label mapping[iv] = label # update the labels agglomerated_segmentation = seg2seg.MapLabels(segmentation, mapping) gold_filename = 'gold/{}_gold.h5'.format(prefix) # TODO fix this code temporary filename agglomeration_filename = 'multicut/{}-agglomerate.h5'.format(prefix) # temporary - write h5 file dataIO.WriteH5File(agglomerated_segmentation, agglomeration_filename, 'stack') import time start_time = time.time() print ('Agglomeration - {}:'.format(threshold)) # create the command line command = '~/software/PixelPred2Seg/comparestacks --stack1 {} --stackbase {} --dilate1 1 --dilatebase 1 --relabel1 --relabelbase --filtersize 100 --anisotropic'.format(agglomeration_filename, gold_filename) # execute the command os.system(command) print (time.time() - start_time)
def Forward(prefix, model_prefix, segmentation, subset, seg2gold_mapping=None, evaluate=False): # optimal parameters from paper (width starts with 3 channels) width = (3, 20, 60, 60) radius = 400 threshold_volume = 10368000 # read in the trained model model = model_from_json(open('{}.json'.format(model_prefix), 'r').read()) model.load_weights('{}-best-loss.h5'.format(model_prefix)) # get all of the examples examples, npositives, nnegatives = CollectExamples(prefix, width, radius, subset) # get all of the large-small pairings pairings = CollectLargeSmallPairs(prefix, width, radius, subset) #assert (len(pairings) == examples.shape[0]) # get the threshold in terms of number of voxels resolution = dataIO.Resolution(prefix) threshold = int(threshold_volume / (resolution[IB_Z] * resolution[IB_Y] * resolution[IB_X])) # get the list of nodes over and under the threshold small_segments, large_segments = FindSmallSegments(segmentation, threshold) # get all of the probabilities probabilities = model.predict_generator(NodeGenerator(examples, width), examples.shape[0], max_q_size=1000) # save the probabilities to a file output_filename = '{}-{}.probabilities'.format(model_prefix, prefix) with open(output_filename, 'wb') as fd: fd.write(struct.pack('q', examples.shape[0])) for ie, (label_one, label_two) in enumerate(pairings): fd.write( struct.pack('qqd', label_one, label_two, probabilities[ie])) # create the correct labels for the ground truth ground_truth = np.zeros(npositives + nnegatives, dtype=np.bool) for iv in range(npositives): ground_truth[iv] = True # get the results with labeled data predictions = Prob2Pred(np.squeeze(probabilities[:npositives + nnegatives])) # print the confusion matrix if not seg2gold_mapping is None: output_filename = '{}-{}-inference.txt'.format(model_prefix, prefix) PrecisionAndRecall(ground_truth, predictions, output_filename) # create a mapping small_segment_predictions = dict() for small_segment in small_segments: small_segment_predictions[small_segment] = set() # go through each pairing for pairing, probability in zip(pairings, probabilities): label_one, label_two = pairing # make sure that either label one or two is small and the other is large assert ((label_one in small_segments) ^ (label_two in small_segments)) if label_one in small_segments: small_segment = label_one large_segment = label_two else: small_segment = label_two large_segment = label_one small_segment_predictions[small_segment].add( (large_segment, probability[0])) # begin to map the small labels max_label = np.amax(segmentation) + 1 mapping = [iv for iv in range(max_label)] if not seg2gold_mapping is None: ncorrect_merges = 0 nincorrect_merges = 0 # go through all of the small segments for small_segment in small_segments: best_probability = -1 best_large_segment = -1 # go through all the neighboring large segments for large_segment, probability in small_segment_predictions[ small_segment]: if probability > best_probability: best_probability = probability best_large_segment = large_segment # this should almost never happen but if it does just continue if best_large_segment == -1 or best_probability < 0.5: mapping[small_segment] = small_segment continue # get all of the best large segments else: mapping[small_segment] = best_large_segment if not seg2gold_mapping is None: # don't consider undetermined locations if seg2gold_mapping[small_segment] < 1 or seg2gold_mapping[ best_large_segment] < 1: continue if seg2gold_mapping[small_segment] == seg2gold_mapping[ best_large_segment]: ncorrect_merges += 1 else: nincorrect_merges += 1 if not seg2gold_mapping is None: print '\nResults:' print ' Correctly Merged: {}'.format(ncorrect_merges) print ' Incorrectly Merged: {}'.format(nincorrect_merges) with open(output_filename, 'a') as fd: fd.write('\nResults:\n') fd.write(' Correctly Merged: {}\n'.format(ncorrect_merges)) fd.write(' Incorrectly Merged: {}\n'.format(nincorrect_merges)) # save the node mapping in the cache for later end2end_mapping = [mapping[iv] for iv in range(max_label)] # initiate the mapping to eliminate small segments seg2seg.MapLabels(segmentation, mapping) # reduce the labels and map again mapping, _ = seg2seg.ReduceLabels(segmentation) seg2seg.MapLabels(segmentation, mapping) # update the end to end mapping with the reduced labels for iv in range(max_label): end2end_mapping[iv] = mapping[end2end_mapping[iv]] # get the model name (first component is architecture and third is node-) model_name = model_prefix.split('/')[1] segmentation_filename = 'segmentations/{}-reduced-{}.h5'.format( prefix, model_name) dataIO.WriteH5File(segmentation, segmentation_filename, 'main') # spawn a new meta file dataIO.SpawnMetaFile(prefix, segmentation_filename, 'main') # save the end to end mapping in the cache if not os.path.exists('cache'): os.mkdir('cache') mapping_filename = 'cache/{}-reduced-{}-end2end.map'.format( prefix, model_name) with open(mapping_filename, 'wb') as fd: fd.write(struct.pack('q', max_label)) for label in range(max_label): fd.write(struct.pack('q', end2end_mapping[label])) if evaluate: gold = dataIO.ReadGoldData(prefix) new_prefix = segmentation_filename.split('/')[-1].split('.')[0] # run the evaluation framework rand_error, vi = comparestacks.VariationOfInformation( new_prefix, segmentation, gold) # write the output file if not os.path.exists('results'): os.mkdir('results') with open('results/{}-reduced-{}.txt'.format(prefix, model_name), 'w') as fd: fd.write('Rand Error Full: {}\n'.format(rand_error[0] + rand_error[1])) fd.write('Rand Error Merge: {}\n'.format(rand_error[0])) fd.write('Rand Error Split: {}\n'.format(rand_error[1])) fd.write('Variation of Information Full: {}\n'.format(vi[0] + vi[1])) fd.write('Variation of Information Merge: {}\n'.format(vi[0])) fd.write('Variation of Information Split: {}\n'.format(vi[1]))