def CalculatePerBlockStatistics(data, iz, iy, ix): # start timing statistics total_time = time.time() # create the output directory if it exists statistics_directory = '{}/statistics'.format(data.TempDirectory()) if not os.path.exists(statistics_directory): os.makedirs(statistics_directory, exist_ok=True) # calculate raw block statistics raw_seg = data.ReadRawSegmentationBlock(iz, iy, ix) raw_n_non_zero, raw_nlabels, raw_voxel_counts = BlockStatistics(raw_seg) del raw_seg # calculate filled block statistics seg = data.ReadSegmentationBlock(iz, iy, ix) filled_n_non_zero, filled_nlabels, filled_voxel_counts = BlockStatistics( seg) del seg assert (filled_nlabels == raw_nlabels) nfilled_voxels = filled_n_non_zero - raw_n_non_zero # create a dictionary for saving statistics = {} statistics['nlabels'] = raw_nlabels statistics['raw_n_non_zero'] = raw_n_non_zero statistics['raw_voxel_counts'] = raw_voxel_counts statistics['filled_n_non_zero'] = filled_n_non_zero statistics['filled_voxel_counts'] = filled_voxel_counts statistics_filename = '{}/{:04d}z-{:04d}y-{:04d}x.pickle'.format( statistics_directory, iz, iy, ix) PickleData(statistics, statistics_filename) total_time = time.time() - total_time print('Total Time: {:0.2f} seconds.'.format(total_time))
def CalculateSomataStatistics(meta_filename): data = ReadMetaData(meta_filename) somata_statistics = {} # iterate over all blocks for iz in range(data.StartZ(), data.EndZ()): for iy in range(data.StartY(), data.EndY()): for ix in range(data.StartX(), data.EndX()): print('{} {:04d}z-{:04d}y-{:04d}x'.format( meta_filename, iz, iy, ix)) # some datasets have no somata (default value) upsampled_non_zero_voxels = 0 if data.SomataDownsampleRate(): somata = data.ReadSomataBlock(iz, iy, ix) # get the number of non zero voxels non_zero_voxels = np.count_nonzero(somata) # the upsample factor is the number of voxels at full resolution correspond # to one voxel at the downsampled resolution upsample_factor = data.SomataDownsampleRate()**3 # the number of voxels masked as full resolution upsampled_non_zero_voxels = upsample_factor * non_zero_voxels somata_statistics[(iz, iy, ix)] = upsampled_non_zero_voxels statistics_directory = '{}/statistics'.format(data.TempDirectory()) if not os.path.exists(statistics_directory): os.makedirs(statistics_directory, exist_ok=True) statistics_filename = '{}/somata-statistics.pickle'.format( statistics_directory) PickleData(somata_statistics, statistics_filename)
def EvaluateGeodesicDistances(data, label): # get the resolution of this data resolution = data.Resolution() # read the distance attributes filename distances_directory = '{}/distances'.format(data.SkeletonOutputDirectory()) distance_filename = '{}/{:016d}.pts'.format(distances_directory, label) # skip over labels not processed if not os.path.exists(distance_filename): return # read the distance attributes distances, input_label = ReadAttributePtsFile(data, distance_filename) assert (input_label == label) # get the synapses filename synapses_filename = '{}/synapses/{:016d}.pts'.format(data.TempDirectory(), label) if not os.path.exists(synapses_filename): return synapses, _ = ReadPtsFile(data, synapses_filename) synapses = synapses[label] # get the somata surface filename somata_surface_filename = '{}/somata_surfaces/{:016d}.pts'.format(data.TempDirectory(), label) if not os.path.exists(somata_surface_filename): return somata_surfaces, _ = ReadPtsFile(data, somata_surface_filename) somata_surface = somata_surfaces[label] npoints = len(somata_surface) # convert the somata surfaces into a numpy point cloud np_point_cloud = np.zeros((npoints, 3), dtype=np.float32) for index, iv in enumerate(somata_surface): # convert the index into indices iz, iy, ix = data.GlobalIndexToIndices(iv) # set the piont cloud value according to the resolution np_point_cloud[index,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix) # create empty dictionary for all results results = {} # keep track of all errors for this label results['diffs'] = [] results['euclidean'] = 0 results['geodesic'] = 0 # if there are no points return the empty set if not npoints: return for iv in synapses: # get the estimated distance at this synapse point distance = distances[iv] iz, iy, ix = data.GlobalIndexToIndices(iv) # convert the coordinates into a 2d vector with the resolutions vec = np.zeros((1, 3), dtype=np.float32) vec[0,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix) # get the min distance from this point to the surface (euclidean distance) euclidean_distance = scipy.spatial.distance.cdist(np_point_cloud, vec).min() # geodesic distances could be less than euclidean only when the synapse is on the cell body # surface but downsampling causes a disconnect between the assumed surface and the cell # body surface. skip these trivial points if (distance < euclidean_distance): continue results['diffs'].append(abs(distance - euclidean_distance)) results['euclidean'] += euclidean_distance results['geodesic'] += distance if len(results['diffs']) < 2: return # output the differences, euclidean, and geodesic distances tmp_distances_directory = '{}/results/distances'.format(data.TempDirectory()) if not os.path.exists(tmp_distances_directory): os.makedirs(tmp_distances_directory, exist_ok=True) output_filename = '{}/{:016d}.pickle'.format(tmp_distances_directory, label) PickleData(results, output_filename)
def EvaluateWidths(data, label): # get the resolution of this data resolution = data.Resolution() # read the width attributes filename widths_directory = '{}/widths'.format(data.SkeletonOutputDirectory()) width_filename = '{}/{:016d}.pts'.format(widths_directory, label) # skip over labels not processed if not os.path.exists(width_filename): return # read the width attributes widths, input_label = ReadAttributePtsFile(data, width_filename) assert (input_label == label) # get the surface filename surfaces_filename = '{}/{:016d}.pts'.format(data.SurfacesDirectory(), label) # read the surfaces, ignore local coordinates surfaces, _ = ReadPtsFile(data, surfaces_filename) surface = surfaces[label] npoints = len(surface) # convert the surface into a numpy point cloud np_point_cloud = np.zeros((npoints, 3), dtype=np.float32) for index, iv in enumerate(surface): # convert the index into indices iz, iy, ix = data.GlobalIndexToIndices(iv) # set the point cloud value according to the resolutions np_point_cloud[index,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix) # create empty dictionary for all results results = {} # keep track of all errors for this label results['errors'] = [] results['estimates'] = 0 results['ground_truths'] = 0 # iterate over all skeleton points for iv in widths.keys(): # get the estimated width at this location width = widths[iv] iz, iy, ix = data.GlobalIndexToIndices(iv) # convert the coordinates into a 2d vector with the resolutions vec = np.zeros((1, 3), dtype=np.float32) vec[0,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix) # get the min distance from this point to the surface (true width) min_distance = scipy.spatial.distance.cdist(np_point_cloud, vec).min() results['errors'].append(abs(width - min_distance)) results['estimates'] += width results['ground_truths'] += min_distance # skip over vacuous skeletons if len(results['errors']) < 2: return # output the errors, estimates, and ground truths to a pickled file tmp_widths_directory = '{}/results/widths'.format(data.TempDirectory()) if not os.path.exists(tmp_widths_directory): os.makedirs(tmp_widths_directory, exist_ok=True) output_filename = '{}/{:016d}.pickle'.format(tmp_widths_directory, label) PickleData(results, output_filename)
def CombineStatistics(data): # start timing statistics total_time = time.time() # the statistics directory must already exist for previous results statistics_directory = '{}/statistics'.format(data.TempDirectory()) label_volumes_with_holes = {} label_volumes_filled = {} label_volumes = {} neuronal_volume_with_holes = 0 neuronal_volume = 0 # read the pickle file generated for each block for iz in range(data.StartZ(), data.EndZ()): for iy in range(data.StartY(), data.EndY()): for ix in range(data.StartX(), data.EndX()): statistics_filename = '{}/{:04d}z-{:04d}y-{:04d}x.pickle'.format( statistics_directory, iz, iy, ix) statistics = ReadPickledData(statistics_filename) for label in statistics['raw_voxel_counts'].keys(): if not label in label_volumes_with_holes: label_volumes_with_holes[label] = 0 label_volumes[label] = 0 label_volumes_with_holes[label] += statistics[ 'raw_voxel_counts'][label] label_volumes[label] += statistics['filled_voxel_counts'][ label] neuronal_volume_with_holes += statistics['raw_n_non_zero'] neuronal_volume += statistics['filled_n_non_zero'] labels = label_volumes.keys() for label in labels: label_volume = label_volumes[label] label_volume_filled = label_volume - label_volumes_with_holes[label] print('Label {}:'.format(label)) print(' Volume: {:14d}'.format(label_volume)) print(' Filled Volume: {:14d} ({:5.2f}%)\n'.format( label_volume_filled, 100 * label_volume_filled / label_volume)) # add to dictionary of filled volues label_volumes_filled[label] = label_volume_filled # calculate what percent of the total volume of holes were filled neuronal_volume_filled = neuronal_volume - neuronal_volume_with_holes total_volume = data.NVoxels() print('Volume Size: {:14d}'.format(total_volume)) print(' Neuron Volume: {:14d} ({:5.2f}%)'.format( neuronal_volume, 100 * neuronal_volume / total_volume)) print(' Filled Volume: {:14d} ({:5.2f}%)'.format( neuronal_volume_filled, 100 * neuronal_volume_filled / neuronal_volume)) # output the aggregated data to a pickle file statistics = {} statistics['label_volumes'] = label_volumes statistics['label_volumes_with_holes'] = label_volumes_with_holes statistics['label_volumes_filled'] = label_volumes_filled statistics['neuronal_volume'] = neuronal_volume statistics['neuronal_volume_with_holes'] = neuronal_volume_with_holes statistics['neuronal_volumes_filled'] = neuronal_volume_filled statistics_filename = '{}/combined-statistics.pickle'.format( statistics_directory) PickleData(statistics, statistics_filename) total_time = time.time() - total_time print('Total Time: {:0.2f} seconds.'.format(total_time))
def FindPerBlockConnectedComponents(data, iz, iy, ix): # start timing statistics total_time = time.time() # get the number of blocks in each dimension nblocks = data.NBlocks() block_volume = data.BlockVolume() # get the index for this block block_index = data.IndexFromIndices(iz, iy, ix) # get the index for the background volumes background_start_label = -1 - (block_index * block_volume) # read in this volume read_time = time.time() seg = data.ReadRawSegmentationBlock(iz, iy, ix) read_time = time.time() - read_time # make sure the block is not larger than mentioned in param file assert (seg.shape[OR_Z] <= data.BlockZLength()) assert (seg.shape[OR_Y] <= data.BlockYLength()) assert (seg.shape[OR_X] <= data.BlockXLength()) # pad the block with zeroes at the ends if seg.shape[OR_Z] < data.BlockZLength( ) or seg.shape[OR_Y] < data.BlockYLength( ) or seg.shape[OR_X] < data.BlockXLength(): # make sure that the block is on one of the far edges assert (iz == data.EndZ() - 1 or iy == data.EndY() - 1 or ix == data.EndX() - 1) zpadding = data.ZBlockLength() - seg.shape[OR_Z] ypadding = data.YBlockLength() - seg.shape[OR_Y] xpadding = data.XBlockLength() - seg.shape[OR_X] # padding only goes at the far edges of the block seg = np.pad(seg, ((0, zpadding), (0, ypadding), (0, xpadding)), 'constant', constant_values=0) # make sure the block is not smaller than mentioned in param file assert (seg.shape[OR_Z] == data.BlockZLength()) assert (seg.shape[OR_Y] == data.BlockYLength()) assert (seg.shape[OR_X] == data.BlockXLength()) # call connected components algorithm for this block components_time = time.time() components = ComputeConnected6Components(seg, background_start_label) # delete original segmentation del seg # save the components file to disk tmp_directory = data.TempBlockDirectory(iz, iy, ix) # create the folder if it does not exist if not os.path.exists(tmp_directory): os.makedirs(tmp_directory, exist_ok=True) # write the components and all walls to file WriteH5File(components, '{}/components.h5'.format(tmp_directory)) WriteH5File(components[0, :, :], '{}/z-min-hole-filling.h5'.format(tmp_directory)) WriteH5File(components[-1, :, :], '{}/z-max-hole-filling.h5'.format(tmp_directory)) WriteH5File(components[:, 0, :], '{}/y-min-hole-filling.h5'.format(tmp_directory)) WriteH5File(components[:, -1, :], '{}/y-max-hole-filling.h5'.format(tmp_directory)) WriteH5File(components[:, :, 0], '{}/x-min-hole-filling.h5'.format(tmp_directory)) WriteH5File(components[:, :, -1], '{}/x-max-hole-filling.h5'.format(tmp_directory)) components_time = time.time() - components_time # find the set of adjacent labels, both inside the volume and the ones connected at the local border adjacency_set_time = time.time() neighbor_label_set = FindAdjacentLabelSetLocal(components) adjacency_set_time = time.time() - adjacency_set_time # create a dictionary of labels from the set background_associated_labels_time = time.time() neighbor_label_dict = Set2Dictionary(neighbor_label_set) # to start, none of the background components are determined undetermined_label_set = set(neighbor_label_dict.keys()) # dictionary associated background components to labels associated_label_dict = Dict.empty(key_type=types.int64, value_type=types.int64) associated_label_dict, undetermined_label_set, holes, non_holes = FindBackgroundComponentsAssociatedLabels( neighbor_label_dict, undetermined_label_set, associated_label_dict) background_associated_labels_time = time.time( ) - background_associated_labels_time # remove from the neighbor label set border elements and those already determined as holes and non holes neighbor_label_set_reduced = PruneNeighborLabelSet(neighbor_label_set, holes, non_holes) neighbor_label_dict_reduced = Set2Dictionary(neighbor_label_set_reduced) # delete the temporary generated set and dictionary del neighbor_label_set, neighbor_label_dict # write the relevant files to disk write_time = time.time() PickleNumbaData( associated_label_dict, '{}/associated-label-set-local.pickle'.format(tmp_directory)) PickleData(undetermined_label_set, '{}/undetermined-label-set-local.pickle'.format(tmp_directory)) PickleData( neighbor_label_dict_reduced, '{}/neighbor-label-dictionary-reduced.pickle'.format(tmp_directory)) write_time = time.time() - write_time total_time = time.time() - total_time print('Read Time: {:0.2f} seconds.'.format(read_time)) print('Components Time: {:0.2f} seconds.'.format(components_time)) print('Adjacency Set Time: {:0.2f} seconds.'.format(adjacency_set_time)) print('Background Components Associated Labels: {:0.2f} seconds.'.format( background_associated_labels_time)) print('Write Time: {:0.2f} seconds.'.format(write_time)) print('Total Time: {:0.2f} seconds.'.format(total_time)) # generate statistics for the holes # does not count towards total computation time labels, counts = np.unique(components, return_counts=True) hole_sizes = {} for iv, label in enumerate(labels): # skip the actual neurons in the volume if label > 0: continue hole_sizes[label] = counts[iv] # save the output file PickleData(hole_sizes, '{}/hole-sizes.pickle'.format(tmp_directory)) # delete the components (no longer needed) del components # output timing statistics timing_directory = '{}/connected-components'.format(data.TimingDirectory()) if not os.path.exists(timing_directory): os.makedirs(timing_directory, exist_ok=True) timing_filename = '{}/{:04d}z-{:04d}y-{:04d}x.txt'.format( timing_directory, iz, iy, ix) with open(timing_filename, 'w') as fd: fd.write('Read Time: {:0.2f} seconds.\n'.format(read_time)) fd.write('Components Time: {:0.2f} seconds.\n'.format(components_time)) fd.write('Adjacency Set Time: {:0.2f} seconds.\n'.format( adjacency_set_time)) fd.write('Background Components Associated Labels: {:0.2f} seconds.\n'. format(background_associated_labels_time)) fd.write('Write Time: {:0.2f} seconds.\n'.format(write_time)) fd.write('Total Time: {:0.2f} seconds.\n'.format(total_time))
def ConnectLabelsAcrossBlocks(data, iz, iy, ix): # start timing statistics total_time = time.time() # find all of the adjacent components across the boundaries adjacency_set_time = time.time() # create an empty list of adjacency sets neighbor_label_set_global = set() # add a fake tuple for numba to know fingerprint neighbor_label_set_global.add((BORDER_CONTACT, BORDER_CONTACT)) # get the temporary directory for this dataset tmp_directory = data.TempBlockDirectory(iz, iy, ix) # this block occurs at the minimum in the z direction if iz == data.StartZ(): neighbor_label_set_global = ConnectBlockToGlobalBorder( neighbor_label_set_global, tmp_directory, 'z', 'min') # this block occurs at the minimum of the y direction if iy == data.StartY(): neighbor_label_set_global = ConnectBlockToGlobalBorder( neighbor_label_set_global, tmp_directory, 'y', 'min') # this block occurs at the minimum of the x direction if ix == data.StartX(): neighbor_label_set_global = ConnectBlockToGlobalBorder( neighbor_label_set_global, tmp_directory, 'x', 'min') # this block occurs at the maximum in the z direction if iz == data.EndZ() - 1: neighbor_label_set_global = ConnectBlockToGlobalBorder( neighbor_label_set_global, tmp_directory, 'z', 'max') # this block has a neighbor in the positive z direction else: neighbor_label_set_global = ConnectBlocks(data, neighbor_label_set_global, iz, iy, ix, 'z') # this block occurs at the maximum of the y direction if iy == data.EndY() - 1: neighbor_label_set_global = ConnectBlockToGlobalBorder( neighbor_label_set_global, tmp_directory, 'y', 'max') # this block has a neighbor in the positive y direction else: neighbor_label_set_global = ConnectBlocks(data, neighbor_label_set_global, iz, iy, ix, 'y') # this block occurs at the maximum of the x direction if ix == data.EndX() - 1: neighbor_label_set_global = ConnectBlockToGlobalBorder( neighbor_label_set_global, tmp_directory, 'x', 'max') # this block has a neighbor in the positive y direction else: neighbor_label_set_global = ConnectBlocks(data, neighbor_label_set_global, iz, iy, ix, 'x') # remove fake tuple from set neighbor_label_set_global.remove((BORDER_CONTACT, BORDER_CONTACT)) adjacency_set_time = time.time() - adjacency_set_time # write the relevant files to disk write_time = time.time() PickleData(neighbor_label_set_global, '{}/neighbor-label-set-global.pickle'.format(tmp_directory)) write_time = time.time() - write_time total_time = time.time() - total_time print('Adjacency Set Time: {:0.2f} seconds.'.format(adjacency_set_time)) print('Write Time: {:0.2f} seconds.'.format(write_time)) print('Total Time: {:0.2f} seconds.'.format(total_time)) # output timing statistics timing_directory = '{}/connect-labels-across-blocks'.format( data.TimingDirectory()) if not os.path.exists(timing_directory): os.makedirs(timing_directory, exist_ok=True) timing_filename = '{}/{:04d}z-{:04d}y-{:04d}x.txt'.format( timing_directory, iz, iy, ix) with open(timing_filename, 'w') as fd: fd.write('Adjacency Set Time: {:0.2f} seconds.\n'.format( adjacency_set_time)) fd.write('Write Time: {:0.2f} seconds.\n'.format(write_time)) fd.write('Total Time: {:0.2f} seconds.\n'.format(total_time))