def SNEMIPointCloud(prefix): filename = 'raw_data/segmentations/{}/seg.h5'.format(prefix) # open this h5 file with h5py.File(filename, 'r') as hf: # use np.array to decompress keys = [key for key in hf.keys()] data = np.array(hf[keys[0]]) # verify the resolutions match zres, yres, xres = dataIO.GridSize(prefix) assert (zres == data.shape[OR_Z] and yres == data.shape[OR_Y] and xres == data.shape[OR_X]) # get all of the non-zero points in a list point_clouds = ExtractSNEMIPointClouds(data) for label, point_cloud in enumerate(point_clouds): # skip over missing elements from these slices if not len(point_cloud): continue # write the point cloud to file output_filename = 'segmentations/{}/{:06d}.pts'.format(prefix, label) with open(output_filename, 'wb') as fd: npoints = len(point_cloud) fd.write(struct.pack('qqqq', zres, yres, xres, npoints)) fd.write(struct.pack('%sq' % npoints, *point_cloud))
def H5Section2PointCloud(prefix, filename, section_index, section_width): # get grid size as filler for saved files zres, yres, xres = dataIO.GridSize(prefix) # open this file and read data with h5py.File(filename, 'r') as hf: data = np.array(hf['main']) # z index should not start at zero most times z_start = section_width * section_index point_clouds = SectionExtractPointCloud(data, z_start) for label, point_cloud in enumerate(point_clouds): # skip over missing elements from these slices if not len(point_cloud): continue # write the point cloud to file if not os.path.isdir( 'original_data/segmentations/{}/sections'.format(prefix)): os.mkdir('original_data/segmentations/{}/sections'.format(prefix)) output_filename = 'original_data/segmentations/{}/sections/section-{:03d}-label-{:06d}.pts'.format( prefix, section_index, label) with open(output_filename, 'wb') as fd: npoints = len(point_cloud) fd.write(struct.pack('qqqq', zres, yres, xres, npoints)) fd.write(struct.pack('%sq' % npoints, *point_cloud))
def CorrectIsthmusEndpoints(prefix, method): # go through every label for this method and dataset directory = '{}/{}'.format(method, prefix) labels = [] for filename in sorted(os.listdir(directory)): if 'endpoints' in filename: continue labels.append(int(filename[:-4])) for label in labels: filename = '{}/{}/{:06d}.pts'.format(method, prefix, label) print (filename) zres, yres, xres = dataIO.GridSize(prefix) with open(filename, 'rb') as fd: zres, yres, xres, npoints = struct.unpack('qqqq', fd.read(32)) points = list(struct.unpack('%sq' % npoints, fd.read(8 * npoints))) for ip in range(npoints): if points[ip] < 0: points[ip] = -1 * points[ip] endpoints = CreateEndpoints(set(points), zres, yres, xres) output_filename = '{}/{}/{:06d}-endpoints.pts'.format(method, prefix, label) with open(output_filename, 'wb') as fd: nendpoints = len(endpoints) fd.write(struct.pack('qqqq', zres, yres, xres, nendpoints)) fd.write(struct.pack('%sq' % nendpoints, *endpoints))
def JWRSomae(label): prefix = 'JWR' # get the grid size to convert to linear coordinates zres, yres, xres = dataIO.GridSize(prefix) # get the original filename filename = 'raw_data/somae/JWR/cell{:03d}_d.txt'.format(label) with open(filename, 'r') as fd: # remove the new line line = fd.readline().strip().split(',') ix = int(line[0]) iy = int(line[1]) iz = int(line[2]) # convert to linear index iv = iz * yres * xres + iy * xres + ix output_filename = 'somae/JWR/{:06d}.pts'.format(label) with open(output_filename, 'wb') as fd: npoints = 1 fd.write(struct.pack('qqqq', zres, yres, xres, npoints)) fd.write(struct.pack('q', iv))
def SNEMISynapses(prefix): # read in the synapse h5 file with h5py.File('raw_data/synapses/{}/synapses.h5'.format(prefix), 'r') as hf: syn_data = np.array(hf[hf.keys()[0]]) # read in all of the synapses syn_per_seg = dataIO.ReadAllPoints(prefix, 'synapses') # get the grid size zres, yres, xres = dataIO.GridSize(prefix) # make sure that every location actually falls on a synapse for segment in syn_per_seg: synapses = syn_per_seg[segment] for synapse in synapses: # make sure this location is a synapse iz = synapse / (yres * xres) iy = (synapse - iz * yres * xres) / xres ix = synapse % xres # make sure this is a non-zero location assert (syn_data[iz,iy,ix]) # read in the segmentation h5 file with h5py.File('raw_data/segmentations/{}/seg.h5'.format(prefix), 'r') as hf: seg_data = np.array(hf[hf.keys()[0]]) syn_seg_pairs = set() syn_seg_found = set() # make sure every (synapse, segment) pair occurs once and only once for iz in range(zres): for iy in range(yres): for ix in range(xres): synapse = syn_data[iz,iy,ix] segment = seg_data[iz,iy,ix] # skip background data if not synapse or not segment: continue syn_seg_pairs.add((synapse, segment)) # see if this point is in the list of synapses iv = iz * yres * xres + iy * xres + ix if iv in syn_per_seg[segment]: # make sure there is only one element for this pair assert (not (synapse, segment) in syn_seg_found) # add this pair to the list of found locations syn_seg_found.add((synapse, segment)) for (synapse, segment) in syn_seg_pairs: assert ((synapse, segment) in syn_seg_found)
def CreateVolumetricSomae(prefix, label): somae_filename = 'raw_data/somae/{}/cell{:03d}_d.h5'.format(prefix, label) if not os.path.exists(somae_filename): return segment_filename = 'surfaces/{}/{:06d}.pts'.format(prefix, label) if not os.path.exists(segment_filename): return if prefix == 'JWR': downz, downy, downx = 4, 4, 4 elif prefix == 'Zebrafinch': downz, downy, downx = 8, 8, 8 # get the grid size zres, yres, xres = dataIO.GridSize(prefix) with h5py.File(somae_filename, 'r') as hf: somae = np.array(hf['main']) # read all segment points point_cloud = dataIO.ReadPoints(prefix, label, subset) somae_point_cloud = set() low_zres, low_yres, low_xres = somae.shape for index in point_cloud: iz = index // (yres * xres) iy = (index - iz * yres * xres) // xres ix = index % xres zdown = iz // downz ydown = iy // downy xdown = ix // downx # bounds checking if low_zres - 1 < zdown: zdown = low_zres - 1 if low_yres - 1 < ydown: ydown = low_yres - 1 if low_xres - 1 < xdown: xdown = low_xres - 1 if somae[zdown, ydown, xdown]: somae_point_cloud.add(index) somae_point_cloud = list(somae_point_cloud) output_filename = 'volumetric_somae/{}/{}/{:06d}.pts'.format(subset, prefix, label) with open(output_filename, 'wb') as fd: nsegment_points = len(somae_point_cloud) fd.write(struct.pack('qqqq', zres, yres, xres, nsegment_points)) fd.write(struct.pack('%sq' % nsegment_points, *somae_point_cloud))
def Segment2Surface(prefix, label): if not os.path.exists('segmentations/{}/{:06d}.pts'.format(prefix, label)): return # get the grid size for this prefix zres, yres, xres = dataIO.GridSize(prefix) point_cloud = set(dataIO.ReadPoints(prefix, label, 'segmentations')) surface_points = FindSurface(point_cloud, zres, yres, xres) surface_filename = 'surfaces/{}/{:06d}.pts'.format(prefix, label) with open(surface_filename, 'wb') as fd: nsurface_points = len(surface_points) fd.write(struct.pack('qqqq', zres, yres, xres, nsurface_points)) fd.write(struct.pack('%sq' % nsurface_points, *surface_points))
def CombineSectionPointClouds(prefix): # get the grid size for this prefix zres, yres, xres = dataIO.GridSize(prefix) sub_directory = 'original_data/segmentations/{}/sections'.format(prefix) # get the maximum label from the filenames max_label = max( int(filename.split('-')[-1][:-4]) for filename in os.listdir(sub_directory)) + 1 # go through every label for label in range(max_label): filenames = sorted( glob.glob('{}/*-label-{:06d}.pts'.format(sub_directory, label))) if not len(filenames): continue # write the point cloud to this final file output_filename = 'original_data/segmentations/{}/{:06d}.pts'.format( prefix, label) with open(output_filename, 'wb') as wfd: # write nonsense number of points first and overwrite later npoints = 0 wfd.write(struct.pack('qqqq', zres, yres, xres, npoints)) for filename in filenames: with open(filename, 'rb') as rfd: zres, yres, xres, section_npoints = struct.unpack( 'qqqq', rfd.read(32)) point_cloud = struct.unpack('%sq' % section_npoints, rfd.read(8 * section_npoints)) wfd.write( struct.pack('%sq' % section_npoints, *point_cloud)) npoints += section_npoints # reset now that we know the number of true points wfd.seek(0) wfd.write(struct.pack('qqqq', zres, yres, xres, npoints))
def JWRPointCloud(label): filename = 'raw_data/segmentations/JWR/cell{:03d}_d.h5'.format(label) # open this binary file with h5py.File(filename, 'r') as hf: # use np.array to decompress keys = [key for key in hf.keys()] data = np.array(hf[keys[0]]) # verify the resolutions match zres, yres, xres = dataIO.GridSize('JWR') assert (zres == data.shape[OR_Z] and yres == data.shape[OR_Y] and xres == data.shape[OR_X]) # get all of the non-zero points in a list point_cloud = ExtractJWRPointCloud(data) # write the point cloud to file output_filename = 'original_data/segmentations/JWR/{:06d}.pts'.format( label) with open(output_filename, 'wb') as fd: npoints = len(point_cloud) fd.write(struct.pack('qqqq', zres, yres, xres, npoints)) fd.write(struct.pack('%sq' % npoints, *point_cloud))
def Fib25Synapses(start_index): prefix = 'Fib25' # get the grid size to convert to linear coordinates zres, yres, xres = dataIO.GridSize(prefix) # get the labels for this dataset labels = [ int(label[:-4]) for label in sorted(os.listdir('segmentations/{}'.format(prefix))) ] synapses_per_segment = {} for label in labels: synapses_per_segment[label] = [] pre_filename = 'raw_data/synapses/Fib25/synapse_gid_ffn_pre_v1.txt' with open(pre_filename, 'r') as fd: for line in fd: # remove the new line and separate parts line = line.strip().split() # get the id and location segment = int(line[0]) + 1 iz = int(line[1]) iy = int(line[2]) ix = int(line[3]) # verify input assert (0 <= ix and ix < xres) assert (0 <= iy and iy < yres) assert (0 <= iz and iz < zres) # oddly this occurs if not segment in synapses_per_segment: continue iv = iz * yres * xres + iy * xres + ix synapses_per_segment[segment].append(iv) post_filename = 'raw_data/synapses/Fib25/synapse_gid_ffn_post_v1.txt' with open(post_filename, 'r') as fd: for line in fd: # remove the new line and separate parts line = line.strip().split() # get the id and location segment = int(line[1]) + 1 iz = int(line[2]) iy = int(line[3]) ix = int(line[4]) # verify input assert (0 <= ix and ix < xres) assert (0 <= iy and iy < yres) assert (0 <= iz and iz < zres) # oddly this occurs if not segment in synapses_per_segment: continue iv = iz * yres * xres + iy * xres + ix synapses_per_segment[segment].append(iv) # save all synapses for each label for segment in synapses_per_segment: output_filename = 'synapses/Fib25/{:06d}.pts'.format(segment) if os.path.exists(output_filename): continue if segment < start_index: continue start_time = time.time() # make sure all the synapses fall near the segment surface_point_cloud = dataIO.ReadPoints(prefix, segment, 'surfaces') npoints = len(surface_point_cloud) np_point_cloud = np.zeros((npoints, 3), dtype=np.int32) for index, iv in enumerate(surface_point_cloud): iz = iv // (yres * xres) iy = (iv - iz * yres * xres) // xres ix = iv % xres np_point_cloud[index, :] = (ix, iy, iz) synapses = [] mse = 0.0 for synapse in synapses_per_segment[segment]: iz = synapse // (yres * xres) iy = (synapse - iz * yres * xres) // xres ix = synapse % xres # create a 2D vector for this point vec = np.zeros((1, 3), dtype=np.int32) vec[0, :] = (ix, iy, iz) closest_point = surface_point_cloud[scipy.spatial.distance.cdist( np_point_cloud, vec).argmin()] point_iz = closest_point // (yres * xres) point_iy = (closest_point - point_iz * yres * xres) // xres point_ix = closest_point % xres distance = math.sqrt((ix - point_ix) * (ix - point_ix) + (iy - point_iy) * (iy - point_iy) + (iz - point_iz) * (iz - point_iz)) # skip over clearly wrong synapses if distance > 30: continue mse += distance synapses.append(closest_point) if not len(synapses): continue with open(output_filename, 'wb') as fd: nsynapses = len(synapses) fd.write(struct.pack('qqqq', zres, yres, xres, nsynapses)) fd.write(struct.pack('%sq' % nsynapses, *synapses)) print('Mean Squared Error {:0.2f} for label {} in {:0.2f} seconds'. format(mse / len(synapses), segment, time.time() - start_time))
def JWRandZebrafinchSynapses(prefix, label): # skip if no surface filename surface_filename = 'surfaces/{}/{:06d}.pts'.format(prefix, label) if not os.path.exists(surface_filename): return # get the grid size to convert to linear coordinates zres, yres, xres = dataIO.GridSize(prefix) # JWR has a downsampled segmentation if prefix == 'JWR': downsample_rate = (1, 8, 8) xyz_coordinates = True else: downsample_rate = (1, 1, 1) xyz_coordinates = False start_time = time.time() # get the original filename if prefix == 'JWR': filename = 'raw_data/synapses/JWR/cell{:03d}_d.txt'.format(label) else: filename = 'raw_data/synapses/Zebrafinch/syn_{:04d}.txt'.format(label) if not os.path.exists(filename): return # read the segmentation points for this label and convert to numpy array surface_point_cloud = dataIO.ReadPoints(prefix, label, 'surfaces') npoints = len(surface_point_cloud) np_point_cloud = np.zeros((npoints, 3), dtype=np.int32) for index, iv in enumerate(surface_point_cloud): iz = iv // (yres * xres) iy = (iv - iz * yres * xres) // xres ix = iv % xres np_point_cloud[index, :] = (ix, iy, iz) synapses = [] mse = 0.0 with open(filename, 'r') as fd: for line in fd: # remove the new line and separate parts line = line.strip().split() if xyz_coordinates: ix = int(line[0]) // downsample_rate[OR_X] iy = int(line[1]) // downsample_rate[OR_Y] iz = int(line[2]) // downsample_rate[OR_Z] else: iz = int(line[0]) // downsample_rate[OR_Z] iy = int(line[1]) // downsample_rate[OR_Y] ix = int(line[2]) // downsample_rate[OR_X] # create a 2D vector for this point vec = np.zeros((1, 3), dtype=np.int32) vec[0, :] = (ix, iy, iz) closest_point = surface_point_cloud[scipy.spatial.distance.cdist( np_point_cloud, vec).argmin()] point_iz = closest_point // (yres * xres) point_iy = (closest_point - point_iz * yres * xres) // xres point_ix = closest_point % xres distance = math.sqrt((ix - point_ix) * (ix - point_ix) + (iy - point_iy) * (iy - point_iy) + (iz - point_iz) * (iz - point_iz)) # skip over clearly wrong synapses if distance > 30: continue mse += distance synapses.append(closest_point) with open(output_filename, 'wb') as fd: nsynapses = len(synapses) fd.write(struct.pack('qqqq', zres, yres, xres, nsynapses)) fd.write(struct.pack('%sq' % nsynapses, *synapses)) print('Mean Squared Error {:0.2f} for label {} in {:0.2f} seconds'.format( mse / len(synapses), label, time.time() - start_time))
def SynapseEvaluate(prefix, method, label): # go through every label for this method and dataset synapse_filename = 'synapses/{}/{:06d}.pts'.format(prefix, label) if not os.path.exists(synapse_filename): return endpoint_filename = '{}/{}/{:06d}.pts'.format(method, prefix, label) if not os.path.exists(endpoint_filename): return nri_filename = 'nris/{}/{}-{:06d}.txt'.format(prefix, method.replace('/', '-'), label) if os.path.exists(nri_filename): return # get the grid size zres, yres, xres = dataIO.GridSize(prefix) resolution = dataIO.Resolution(prefix) max_distance = 800 # read the true synapse locations synapses = dataIO.ReadPoints(prefix, label, 'synapses') # read the predicted locations predictions = ReadPredictions(prefix, method, label) ngt_pts = len(synapses) npr_pts = len(predictions) gt_pts = np.zeros((ngt_pts, 3), dtype=np.int64) pr_pts = np.zeros((npr_pts, 3), dtype=np.int64) npoints = ngt_pts * npr_pts for pt in range(ngt_pts): # get x, y, z locations index = synapses[pt] iz = index // (yres * xres) iy = (index - iz * yres * xres) // xres ix = index % xres # coordinates are (x, y, z) gt_pts[pt,0] = resolution[OR_X] * ix gt_pts[pt,1] = resolution[OR_Y] * iy gt_pts[pt,2] = resolution[OR_Z] * iz for pt in range(npr_pts): # get x, y, z locations index = predictions[pt] iz = index // (yres * xres) iy = (index - iz * yres * xres) // xres ix = index % xres # coordinates are (x, y, z) pr_pts[pt,0] = resolution[OR_X] * ix pr_pts[pt,1] = resolution[OR_Y] * iy pr_pts[pt,2] = resolution[OR_Z] * iz cost_matrix = scipy.spatial.distance.cdist(gt_pts, pr_pts) matching = scipy.optimize.linear_sum_assignment(cost_matrix) valid_matches = set() for match in zip(matching[0], matching[1]): # valid pairs must be within max_distance in nanometers if cost_matrix[match[0], match[1]] > max_distance: continue valid_matches.add(match) ncorrect_synapses = len(valid_matches) nadded_synapses = npr_pts - len(valid_matches) nmissed_synapses = ngt_pts = len(valid_matches) # the number of true positives is the number of paths between the valid locations true_positives = ncorrect_synapses * (ncorrect_synapses - 1) // 2 # the number of false positives is every pair of paths between true and added synapses false_positives = ncorrect_synapses * nadded_synapses # the number of false negatives is every synapse pair that is divided false_negatives = ncorrect_synapses * nmissed_synapses if true_positives == 0: nri = 0 else: precision = true_positives / float(true_positives + false_positives) recall = true_positives / float(true_positives + false_negatives) nri = 2 * (precision * recall) / (precision + recall) with open(nri_filename, 'w') as fd: fd.write('{} {} {}\n'.format(true_positives, false_positives, false_negatives)) fd.write('{}\n'.format(nri)) return nri
def EvaluateWidths(prefix, label): start_time = time.time() # get the resolution, surface voxels, and radii for this prefix label pair resolution = dataIO.Resolution(prefix) zres, yres, xres = dataIO.GridSize(prefix) # surface information surface_point_cloud = np.array(dataIO.ReadPoints(prefix, label, 'surfaces'), dtype=np.int64) npoints = len(surface_point_cloud) np_point_cloud = np.zeros((npoints, 3), dtype=np.int32) for index, iv in enumerate(surface_point_cloud): iz = iv // (yres * xres) iy = (iv - iz * yres * xres) // xres ix = iv % xres np_point_cloud[index, :] = (resolution[OR_X] * ix, resolution[OR_Y] * iy, resolution[OR_Z] * iz) index += 1 widths = dataIO.ReadWidths(prefix, label) skeletons = dataIO.ReadPoints(prefix, label, 'connectomes') # keep track of the error over time mean_absolute_error = 0.0 count = 0 for iv, index in enumerate(skeletons): if random.random() < 0.80: continue # some of the soma locations will not be in the widths if not index in widths: continue iz = index // (yres * xres) iy = (index - iz * yres * xres) // xres ix = index % xres # create a 2D vector for this point vec = np.zeros((1, 3), dtype=np.int32) vec[0, :] = (resolution[OR_X] * ix, resolution[OR_Y] * iy, resolution[OR_Z] * iz) # get the radius at this index radius = widths[index] minimum_distance = scipy.spatial.distance.cdist(np_point_cloud, vec).min() error = abs(radius - minimum_distance) mean_absolute_error += error count += 1 print('Mean Absolute Error: {:0.2f} nanometers'.format( mean_absolute_error / count)) print(time.time() - start_time) output_filename = 'width-errors/{}-{:06d}.txt'.format(prefix, label) with open(output_filename, 'w') as fd: fd.write('{} {}\n'.format(mean_absolute_error, count))