Exemplo n.º 1
0
def SkeletonCandidateGenerator(prefix, network_distance, candidates, width, augment):
    # read in all relevant information
    segmentation = dataIO.ReadSegmentationData(prefix)
    world_res = dataIO.Resolution(prefix)

    # get the radii for the bounding box in grid coordinates
    radii = (network_distance / world_res[0], network_distance / world_res[1], network_distance / world_res[2])
    index = 0
    
    start_time = time.time()
    continue_printing = True

    # continue indefinitely
    while True:
        # this prevents overflow on the queue - the repeated samples are never used
        if index >= len(candidates): 
            continue_printing = False
            index = 0

        # get the current candidate
        candidate = candidates[index]

        # increment the index
        index += 1

        if continue_printing and not (index % (len(candidates) / 10)): 
            print '{}/{}: {}'.format(index, len(candidates), time.time() - start_time)

        # rotation equals 0
        yield ExtractFeature(segmentation, candidate, width, radii, augment=augment)
Exemplo n.º 2
0
def GenerateExamplesArray(prefix, segmentation, examples, width,
                          network_radius):
    # get the radius along each dimensions in terms of voxels
    resolution = dataIO.Resolution(prefix)
    (zradius, yradius, xradius) = (int(network_radius / resolution[IB_Z]),
                                   int(network_radius / resolution[IB_Y]),
                                   int(network_radius / resolution[IB_X]))
    zres, yres, xres = segmentation.shape

    # find the number of examples
    nexamples = len(examples)

    # create the empty array of examples
    examples_array = np.zeros(
        (nexamples, width[IB_Z], width[IB_Y], width[IB_X]), dtype=np.uint8)

    for index, (zpoint, ypoint, xpoint, label_one,
                label_two) in enumerate(examples):
        # need to make sure that bounding box does not leave location so sizes are correct
        zmin = max(0, zpoint - zradius)
        ymin = max(0, ypoint - yradius)
        xmin = max(0, xpoint - xradius)
        zmax = min(zres, zpoint + zradius + 1)
        ymax = min(yres, ypoint + yradius + 1)
        xmax = min(xres, xpoint + xradius + 1)

        # create the empty example file with three channels corresponding to the value of segment
        example = np.zeros((2 * zradius + 1, 2 * yradius + 1, 2 * xradius + 1),
                           dtype=np.int32)

        # get the valid location around this point
        segment = ExtractExample(
            segmentation[zmin:zmax, ymin:ymax, xmin:xmax].copy(), label_one,
            label_two)

        if example.shape == segment.shape:
            example = segment
        else:
            if zmin == 0: zstart = zradius - zpoint
            else: zstart = 0

            if ymin == 0: ystart = yradius - ypoint
            else: ystart = 0

            if xmin == 0: xstart = xradius - xpoint
            else: xstart = 0

            # the second and third channels are one if the corresponding voxels belong to the individual segments
            example[zstart:zstart + segment.shape[IB_Z],
                    ystart:ystart + segment.shape[IB_Y],
                    xstart:xstart + segment.shape[IB_X]] = segment

        # scale the feature to the appropriate width
        examples_array[index, :, :, :] = ScaleFeature(example, width,
                                                      label_one, label_two)

    # return the examples
    return examples_array
Exemplo n.º 3
0
def SaveFeatures(prefix_one, prefix_two, threshold, maximum_distance):
    # read in both segmentation and image files
    segmentations = (dataIO.ReadSegmentationData(prefix_one),
                     dataIO.ReadSegmentationData(prefix_two))
    assert (segmentations[0].shape == segmentations[1].shape)
    images = (dataIO.ReadImageData(prefix_one),
              dataIO.ReadImageData(prefix_two))
    assert (images[0].shape == images[1].shape)
    bboxes = (dataIO.GetWorldBBox(prefix_one), dataIO.GetWorldBBox(prefix_two))
    world_res = dataIO.Resolution(prefix_one)
    assert (world_res == dataIO.Resolution(prefix_two))

    # get the radii for this feature
    radii = (maximum_distance / world_res[IB_Z],
             maximum_distance / world_res[IB_Y],
             maximum_distance / world_res[IB_X])
    width = (2 * radii[IB_Z], 2 * radii[IB_Y], 2 * radii[IB_X], 3)

    # get all of the candidates for these prefixes
    candidates = FindCandidates(prefix_one, prefix_two, threshold,
                                maximum_distance, True)
    ncandidates = len(candidates)

    # iterate over all candidates
    for iv, candidate in enumerate(candidates):
        # get the example with zero rtation
        example = ExtractFeature(segmentations, images, bboxes, candidate,
                                 width, radii, 0)

        # compress the channels
        compressed_output = np.zeros((width[IB_Z], width[IB_Y], width[IB_X]),
                                     dtype=np.uint8)
        compressed_output[example[0, :, :, :, 0] == 1] = 1
        compressed_output[example[0, :, :, :, 1] == 1] = 2
        # both candidates are present at this location
        compressed_output[np.logical_and(example[0, :, :, :, 0] == 1,
                                         example[0, :, :, :, 1] == 1)] = 3

        # save the output file
        filename = 'features/ebro/{}-{}/{}-{}nm-{:05d}.h5'.format(
            prefix_one, prefix_two, threshold, maximum_distance, iv)
        dataIO.WriteH5File(compressed_output, filename, 'main')
Exemplo n.º 4
0
def SkeletonCandidateGenerator(prefix, network_distance, positive_candidates,
                               negative_candidates, parameters, width):
    # get the number of channels for the data
    nchannels = width[0]
    npositive_candidates = len(positive_candidates)
    nnegative_candidates = len(negative_candidates)

    # read in all relevant information
    segmentation = dataIO.ReadSegmentationData(prefix)
    world_res = dataIO.Resolution(prefix)

    # get the radii for the relevant region
    radii = (network_distance / world_res[IB_Z],
             network_distance / world_res[IB_Y],
             network_distance / world_res[IB_X])

    # determine the total number of epochs
    batch_size = parameters['batch_size']

    examples = np.zeros((batch_size, nchannels, width[IB_Z + 1],
                         width[IB_Y + 1], width[IB_X + 1]),
                        dtype=np.float32)
    labels = np.zeros(batch_size, dtype=np.float32)

    random.shuffle(positive_candidates)
    random.shuffle(negative_candidates)

    positive_index = 0
    negative_index = 0

    while True:
        # randomly choose elements for the batch
        for iv in range(batch_size / 2):
            positive_candidate = positive_candidates[positive_index]
            negative_candidate = negative_candidates[negative_index]

            examples[2 * iv, :, :, :, :] = ExtractFeature(
                segmentation, positive_candidate, width, radii)
            labels[2 * iv] = positive_candidate.ground_truth
            examples[2 * iv + 1, :, :, :, :] = ExtractFeature(
                segmentation, negative_candidate, width, radii)
            labels[2 * iv + 1] = negative_candidate.ground_truth

            positive_index += 1
            if positive_index == npositive_candidates:
                random.shuffle(positive_candidates)
                positive_index = 0
            negative_index += 1
            if negative_index == nnegative_candidates:
                random.shuffle(negative_candidates)
                negative_index = 0

        yield (examples, labels)
Exemplo n.º 5
0
def EbroCandidateGenerator(prefix_one, prefix_two, maximum_distance, candidates, width):
    # read in all relevant information
    segmentations = (dataIO.ReadSegmentationData(prefix_one), dataIO.ReadSegmentationData(prefix_two))
    assert (segmentations[0].shape == segmentations[1].shape)
    images = (dataIO.ReadImageData(prefix_one), dataIO.ReadImageData(prefix_two))
    assert (images[0].shape == images[1].shape)
    bboxes = (dataIO.GetWorldBBox(prefix_one), dataIO.GetWorldBBox(prefix_two))
    world_res = dataIO.Resolution(prefix_one)
    assert (world_res == dataIO.Resolution(prefix_two))

    # get the radii for the relevant region
    radii = (maximum_distance / world_res[IB_Z], maximum_distance / world_res[IB_Y], maximum_distance / world_res[IB_X])
    index = 0
    start_time = time.time()
    while True:
        # prevent overflow
        if index >= len(candidates): index = 0

        candidate = candidates[index]
        index += 1

        # rotation equals 0
        yield ExtractFeature(segmentations, images, bboxes, candidate, width, radii, 0)
Exemplo n.º 6
0
def NuclearCandidateGenerator(prefix, network_distance, candidates, parameters,
                              width):
    # get the number of channels for the data
    nchannels = width[0]

    # read in all relevant information
    segmentation = dataIO.ReadSegmentationData(prefix)
    world_res = dataIO.Resolution(prefix)

    # get the radii for the relevant region
    radii = (network_distance / world_res[IB_Z],
             network_distance / world_res[IB_Y],
             network_distance / world_res[IB_X])

    # determine the total number of epochs
    if parameters['augment']: rotations = 16
    else: rotations = 1

    ncandidates = len(candidates)
    batch_size = parameters['batch_size']
    if rotations * ncandidates % batch_size:
        nbatches = (rotations * ncandidates / batch_size) + 1
    else:
        nbatches = (rotations * ncandidates / batch_size)

    examples = np.zeros((batch_size, nchannels, width[IB_Z + 1],
                         width[IB_Y + 1], width[IB_X + 1]),
                        dtype=np.float32)
    labels = np.zeros(batch_size, dtype=np.float32)

    while True:
        index = 0
        for _ in range(nbatches):
            for iv in range(batch_size):
                # get the candidate index and the rotation
                rotation = index / ncandidates
                candidate = candidates[index % ncandidates]

                # get the example and label
                examples[iv, :, :, :, :] = ExtractFeature(
                    segmentation, candidate, width, radii, rotation)
                labels[iv] = candidate.ground_truth

                # provide overflow relief
                index += 1
                if index >= ncandidates * rotations: index = 0

            yield (examples, labels)
Exemplo n.º 7
0
def GenerateFeatures(prefix):
    # find the level of anisotropy
    resolution = dataIO.Resolution(prefix)
    zy = resolution[IB_Z] / resolution[IB_Y]
    zx = resolution[IB_Z] / resolution[IB_X]

    # assert isotropy in xy
    assert (zy == zx)

    # read the image 
    image = dataIO.ReadImageData(prefix)
    zres, yres, xres = image.shape

    zx_slice = image[:,0,:]
    zy_slice = image[:,:,0]

    dataIO.WriteImage('{}-zx.png'.format(prefix), zx_slice)
    dataIO.WriteImage('{}-zy.png'.format(prefix), zy_slice)

    
    
    return
    
    for iv in range(zy):
        xcut = image[:,:,iv::zx]
        ycut = image[:,:,iv::zy]

        xfilename = 'super_resolution/{}-x-{}.h5'.format(prefix, iv + 1)
        yfilename = 'super_resolution/{}-y-{}.h5'.format(prefix, iv + 1)
        
        dataIO.WriteH5File(xcut, xfilename, 'main')
        dataIO.WriteH5File(ycut, yfilename, 'main')

        for iz in range(zres):
            xslice = xcut[iz,:,:]
            yslice = ycut[iz,:,:]
            zslice = image[iz,:,:]
            
            xfilename = 'super_resolution/images/{}-x-{}-{:04d}.png'.format(prefix, iv + 1, iz)
            yfilename = 'super_resolution/images/{}-y-{}-{:04d}.png'.format(prefix, iv + 1, iz)
            zfilename = 'super_resolution/images/{}-z-{:04d}.png'.format(prefix, iz)
            
            dataIO.WriteImage(xfilename, xslice)
            dataIO.WriteImage(yfilename, yslice)
            dataIO.WriteImage(zfilename, zslice)
            
        break
Exemplo n.º 8
0
def SaveFeatures(prefix, threshold, maximum_distance, network_distance):
    # make sure the folder for this model prefix exists
    output_folder = 'features/skeleton/{}'.format(prefix)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # read in relevant information
    segmentation = dataIO.ReadSegmentationData(prefix)
    grid_size = segmentation.shape
    world_res = dataIO.Resolution(prefix)

    # get the radii for the bounding box
    radii = (maximum_distance / world_res[IB_Z],
             maximum_distance / world_res[IB_Y],
             maximum_distance / world_res[IB_X])
    width = (2 * radii[IB_Z], 2 * radii[IB_Y], 2 * radii[IB_X], 3)

    # read all candidates
    candidates = FindCandidates(prefix,
                                threshold,
                                maximum_distance,
                                network_distance,
                                inference=True)
    ncandidates = len(candidates)

    for iv, candidate in enumerate(candidates):
        # get an example with zero rotation
        example = ExtractFeature(segmentation, candidate, width, radii, 0)

        # compress the channels
        compressed_output = np.zeros((width[IB_Z], width[IB_Y], width[IB_X]),
                                     dtype=np.uint8)
        compressed_output[example[0, :, :, :, 0] == 1] = 1
        compressed_output[example[0, :, :, :, 1] == 1] = 2

        # save the output file
        filename = 'features/skeleton/{}/{}-{}nm-{}nm-{:05d}.h5'.format(
            prefix, threshold, maximum_distance, network_distance, iv)
        dataIO.WriteH5File(compressed_output, filename, 'main')
Exemplo n.º 9
0
def EndpointTraversal(prefix, segmentation, seg2gold_mapping,
                      maximum_distance):
    # get the resolution for this data
    resolution = dataIO.Resolution(prefix)

    # get the maximum label
    max_label = np.amax(segmentation) + 1

    # read in all of the skeletons
    skeletons = dataIO.ReadSkeletons(prefix)

    # create a set of labels to consider
    edges = []

    # go through every skeletons endpoints
    for skeleton in skeletons:
        label = skeleton.label

        for ie, endpoint in enumerate(skeleton.endpoints):
            # get the (x, y, z) location
            center = (endpoint.iz, endpoint.iy, endpoint.ix)
            vector = endpoint.vector
            # do not consider null vectors (the sums are all 0 or 1)
            if vector[IB_Z] * vector[IB_Z] + vector[IB_Y] * vector[
                    IB_Y] + vector[IB_X] * vector[IB_X] < 0.5:
                continue

            neighbors, means = TraverseIndividualEndpoint(
                segmentation, center, vector, resolution, max_label,
                maximum_distance)

            for iv, neighbor_label in enumerate(neighbors):
                (zpoint, ypoint, xpoint) = means[iv]
                # append this to this list of edges
                edges.append(
                    (zpoint, ypoint, xpoint, label, neighbor_label, ie))

    return edges
Exemplo n.º 10
0
def Train(prefix_one, prefix_two, model_prefix, threshold, maximum_distance, width, parameters):
    # identify convenient variables
    nchannels = width[3]
    starting_epoch = parameters['starting_epoch']
    iterations = parameters['iterations']
    batch_size = parameters['batch_size']
    initial_learning_rate = parameters['initial_learning_rate']
    decay_rate = parameters['decay_rate']

    # architecture parameters
    activation = parameters['activation']
    double_conv = parameters['double_conv']
    normalization = parameters['normalization']
    optimizer = parameters['optimizer']
    weights = parameters['weights']


    # create the model
    model = Sequential()

    # add all layers to the model
    AddConvolutionalLayer(model, 16, (3, 3, 3), 'valid', activation, normalization, width)
    if double_conv: AddConvolutionalLayer(model, 16, (3, 3, 3), 'valid', activation, normalization)
    AddPoolingLayer(model, (1, 2, 2), 0.0, normalization)

    AddConvolutionalLayer(model, 32, (3, 3, 3), 'valid', activation, normalization)
    if double_conv: AddConvolutionalLayer(model, 32, (3, 3, 3), 'valid', activation, normalization)
    AddPoolingLayer(model, (1, 2, 2), 0.0, normalization)

    AddConvolutionalLayer(model, 64, (3, 3, 3), 'valid', activation, normalization)
    if double_conv: AddConvolutionalLayer(model, 64, (3, 3, 3), 'valid', activation, normalization)
    AddPoolingLayer(model, (2, 2, 2), 0.0, normalization)

    AddConvolutionalLayer(model, 128, (3, 3, 3), 'valid', activation, normalization)
    if double_conv: AddConvolutionalLayer(model, 128, (3, 3, 3), 'valid', activation, normalization)
    AddPoolingLayer(model, (2, 2, 2), 0.0, normalization)

    AddFlattenLayer(model)
    AddDenseLayer(model, 512, 0.0, activation, normalization)
    AddDenseLayer(model, 1, 0.0, 'sigmoid', False)

    # compile the model
    if optimizer == 'adam': opt = Adam(lr=initial_learning_rate, decay=decay_rate, beta_1=0.99, beta_2=0.999, epsilon=1e-08)
    elif optimizer == 'sgd': opt = SGD(lr=initial_learning_rate, decay=decay_rate, momentum=0.9, nesterov=True)
    model.compile(loss='mean_squared_error', optimizer=opt)




    # make sure the folder for the model prefix exists
    root_location = model_prefix.rfind('/')
    output_folder = model_prefix[:root_location]

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # write out the network parameters to a file
    WriteLogfiles(model, model_prefix, parameters)



    # read in all relevant information
    segmentations = (dataIO.ReadSegmentationData(prefix_one), dataIO.ReadSegmentationData(prefix_two))
    assert (segmentations[0].shape == segmentations[1].shape)
    images = (dataIO.ReadImageData(prefix_one), dataIO.ReadImageData(prefix_two))
    assert (images[0].shape == images[1].shape)
    bboxes = (dataIO.GetWorldBBox(prefix_one), dataIO.GetWorldBBox(prefix_two))
    grid_size = segmentations[0].shape
    world_res = dataIO.Resolution(prefix_one)
    assert (world_res == dataIO.Resolution(prefix_two))

    # get the radii for the relevant region
    radii = (maximum_distance / world_res[IB_Z], maximum_distance / world_res[IB_Y], maximum_distance / world_res[IB_X])

    # get the candidate between these two prefixes
    candidates = FindCandidates(prefix_one, prefix_two, threshold, maximum_distance, inference=False)
    ncandidates = len(candidates)



    # determine the total number of epochs
    if parameters['augment']: rotations = 16
    else: rotations = 1

    if rotations * ncandidates % batch_size: 
        nepochs = (iterations * rotations * ncandidates / batch_size) + 1
    else:
        nepochs = (iterations * rotations * ncandidates / batch_size)



    # need to adjust learning rate and load in existing weights
    if starting_epoch == 1: index = 0
    else:
        nexamples = starting_epoch * batch_size
        current_learning_rate = initial_learning_rate / (1.0 + nexamples * decay_rate)
        backend.set_value(model.optimizer.lr, current_learning_rate)

        index = (starting_epoch * batch_size) % (ncandidates * rotations)

        model.load_weights('{}-{}.h5'.format(model_prefix, starting_epoch))



    # iterate for every epoch
    start_time = time.time()
    for epoch in range(starting_epoch, nepochs + 1):
        # print statistics
        if not epoch % 20:
            print '{}/{} in {:4f} seconds'.format(epoch, nepochs, time.time() - start_time)
            start_time = time.time()



        # create arrays for examples and labels
        examples = np.zeros((batch_size, width[IB_Z], width[IB_Y], width[IB_X], nchannels), dtype=np.uint8)
        labels = np.zeros((batch_size, 1), dtype=np.uint8)

        for iv in range(batch_size):
            # get the index and the rotation
            rotation = index / ncandidates
            candidate = candidates[index % ncandidates]

            # get the example and label
            examples[iv,:,:,:,:] = ExtractFeature(segmentations, images, bboxes, candidate, width, radii, rotation)
            labels[iv,:] = candidate.ground_truth

            # provide overflow relief
            index += 1
            if index >= ncandidates * rotations: index = 0

        # fit the model
        model.fit(examples, labels, epochs=1, verbose=0, class_weight=weights)



        # save for every 1000 examples
        if not epoch % (1000 / batch_size):
            json_string = model.to_json()
            open('{}-{}.json'.format(model_prefix, epoch), 'w').write(json_string)
            model.save_weights('{}-{}.h5'.format(model_prefix, epoch))



        # update the learning rate
        nexamples = epoch * batch_size
        current_learning_rate = initial_learning_rate / (1.0 + nexamples * decay_rate)
        backend.set_value(model.optimizer.lr, current_learning_rate)



    # save the fully trained model
    json_string = model.to_json()
    open('{}.json'.format(model_prefix), 'w').write(json_string)
    model.save_weights('{}.h5'.format(model_prefix))
Exemplo n.º 11
0
def FindEndpointMatches(prefix, algorithm, params, resolution, ground_truth):
    # read the endpoints for this set of parameters
    skeleton_filename = 'benchmarks/skeleton/{}-{}-{:03d}x{:03d}x{:03d}-upsample-{}-skeleton.pts'.format(
        prefix, algorithm, resolution[IB_X], resolution[IB_Y],
        resolution[IB_Z], params)
    if not os.path.exists(skeleton_filename): return 0, 0, 0

    # read the endpoints
    proposed = ReadSkeletonEndpoints(skeleton_filename)
    assert (len(ground_truth) == len(proposed))

    # don't allow points to be connected over this distance
    max_distance = 800

    # go through every label
    max_label = len(ground_truth)

    output_filename = 'benchmarks/skeleton/matchings/{}-{}-{:03d}x{:03d}x{:03d}-{}-matching-pairs.pts'.format(
        prefix, algorithm, resolution[IB_X], resolution[IB_Y],
        resolution[IB_Z], params)

    true_positives = 0
    false_positives = 0
    false_negatives = 0

    with open(output_filename, 'wb') as fd:
        # need resolution for max distance
        resolution = dataIO.Resolution(prefix)

        fd.write(struct.pack('q', max_label))
        for label in range(max_label):
            # no ground truth for this label
            if not len(ground_truth[label]):
                fd.write(struct.pack('q', 0))
                continue

            ngt_pts = len(ground_truth[label])
            npr_pts = len(proposed[label])

            gt_pts = np.zeros((ngt_pts, 3), dtype=np.int64)
            pr_pts = np.zeros((npr_pts, 3), dtype=np.int64)

            # can not use IB_NDIMS because coordinates are (x, y, z) here
            for pt in range(ngt_pts):
                gt_pts[pt, 0] = resolution[IB_X] * ground_truth[label][pt][0]
                gt_pts[pt, 1] = resolution[IB_Y] * ground_truth[label][pt][1]
                gt_pts[pt, 2] = resolution[IB_Z] * ground_truth[label][pt][2]

            for pt in range(npr_pts):
                pr_pts[pt, 0] = resolution[IB_X] * proposed[label][pt][0]
                pr_pts[pt, 1] = resolution[IB_Y] * proposed[label][pt][1]
                pr_pts[pt, 2] = resolution[IB_Z] * proposed[label][pt][2]

            cost_matrix = scipy.spatial.distance.cdist(gt_pts, pr_pts)
            matching = scipy.optimize.linear_sum_assignment(cost_matrix)

            valid_matches = set()
            for match in zip(matching[0], matching[1]):
                # valid pairs must be within max_distance (in nanometers)
                if cost_matrix[match[0], match[1]] > max_distance: continue

                valid_matches.add(match)

            true_positives += len(valid_matches)
            false_positives += npr_pts - len(valid_matches)
            false_negatives += ngt_pts - len(valid_matches)

            # write the ground truth and the corresponding segment endpoints
            fd.write(struct.pack('q', len(valid_matches)))
            for match in valid_matches:
                fd.write(struct.pack('qq', match[0], match[1]))

    precision = true_positives / float(true_positives + false_positives)
    recall = true_positives / float(true_positives + false_negatives)

    fscore = 2 * (precision * recall) / float(precision + recall)

    return fscore, precision, recall
Exemplo n.º 12
0
def Forward(prefix,
            model_prefix,
            segmentation,
            width,
            radius,
            subset,
            evaluate=False,
            threshold_volume=10368000):
    # read in the trained model
    model = model_from_json(open('{}.json'.format(model_prefix), 'r').read())
    model.load_weights('{}-best-loss.h5'.format(model_prefix))

    # get all of the examples
    examples, npositives, nnegatives = CollectExamples(prefix, width, radius,
                                                       subset)

    # get all of the large-small pairings
    pairings = CollectLargeSmallPairs(prefix, width, radius, subset)
    #assert (len(pairings) == examples.shape[0])

    # get the threshold in terms of number of voxels
    resolution = dataIO.Resolution(prefix)
    threshold = int(threshold_volume /
                    (resolution[IB_Z] * resolution[IB_Y] * resolution[IB_X]))

    # get the list of nodes over and under the threshold
    small_segments, large_segments = FindSmallSegments(segmentation, threshold)

    # get all of the probabilities
    probabilities = model.predict_generator(NodeGenerator(examples, width),
                                            examples.shape[0],
                                            max_q_size=1000)

    # save the probabilities to a file
    output_filename = '{}-{}.probabilities'.format(model_prefix, prefix)
    with open(output_filename, 'wb') as fd:
        fd.write(struct.pack('q', examples.shape[0]))
        for ie, (label_one, label_two) in enumerate(pairings):
            fd.write(
                struct.pack('qqd', label_one, label_two, probabilities[ie]))

    # create the correct labels for the ground truth
    ground_truth = np.zeros(npositives + nnegatives, dtype=np.bool)
    for iv in range(npositives):
        ground_truth[iv] = True

    # get the results with labeled data
    predictions = Prob2Pred(np.squeeze(probabilities[:npositives +
                                                     nnegatives]))

    # print the confusion matrix
    output_filename = '{}-{}-inference.txt'.format(model_prefix, prefix)
    PrecisionAndRecall(ground_truth, predictions, output_filename)

    # create a mapping
    small_segment_predictions = dict()
    for small_segment in small_segments:
        small_segment_predictions[small_segment] = set()

    # go through each pairing
    for pairing, probability in zip(pairings, probabilities):
        label_one, label_two = pairing
        # make sure that either label one or two is small and the other is large
        assert ((label_one in small_segments) ^ (label_two in small_segments))

        if label_one in small_segments:
            small_segment = label_one
            large_segment = label_two
        else:
            small_segment = label_two
            large_segment = label_one

        small_segment_predictions[small_segment].add(
            (large_segment, probability[0]))

    # begin to map the small labels
    max_label = np.amax(segmentation) + 1
    mapping = [iv for iv in range(max_label)]

    # look at seg2gold to see how many correct segments are merged
    seg2gold_mapping = seg2gold.Mapping(prefix)

    ncorrect_merges = 0
    nincorrect_merges = 0

    # go through all of the small segments
    for small_segment in small_segments:
        best_probability = -1
        best_large_segment = -1

        # go through all the neighboring large segments
        for large_segment, probability in small_segment_predictions[
                small_segment]:
            if probability > best_probability:
                best_probability = probability
                best_large_segment = large_segment

        # this should almost never happen but if it does just continue
        if best_large_segment == -1 or best_probability < 0.5:
            mapping[small_segment] = small_segment
            continue
        # get all of the best large segments
        else:
            mapping[small_segment] = best_large_segment

        # don't consider undetermined locations
        if seg2gold_mapping[small_segment] < 1 or seg2gold_mapping[
                best_large_segment] < 1:
            continue

        if seg2gold_mapping[small_segment] == seg2gold_mapping[
                best_large_segment]:
            ncorrect_merges += 1
        else:
            nincorrect_merges += 1

    print '\nResults:'
    print '  Correctly Merged: {}'.format(ncorrect_merges)
    print '  Incorrectly Merged: {}'.format(nincorrect_merges)

    with open(output_filename, 'a') as fd:
        fd.write('\nResults:\n')
        fd.write('  Correctly Merged: {}\n'.format(ncorrect_merges))
        fd.write('  Incorrectly Merged: {}\n'.format(nincorrect_merges))

    # save the node mapping in the cache for later
    end2end_mapping = [mapping[iv] for iv in range(max_label)]

    # initiate the mapping to eliminate small segments
    seg2seg.MapLabels(segmentation, mapping)

    # reduce the labels and map again
    mapping, _ = seg2seg.ReduceLabels(segmentation)
    seg2seg.MapLabels(segmentation, mapping)

    # update the end to end mapping with the reduced labels
    for iv in range(max_label):
        end2end_mapping[iv] = mapping[end2end_mapping[iv]]

    # get the model name (first component is architecture and third is node-)
    model_name = model_prefix.split('/')[1]
    output_filename = 'rhoana/{}-reduced-{}.h5'.format(prefix, model_name)
    dataIO.WriteH5File(segmentation, output_filename, 'main')

    # spawn a new meta file
    dataIO.SpawnMetaFile(prefix, output_filename, 'main')

    # save the end to end mapping in the cache
    mapping_filename = 'cache/{}-reduced-{}-end2end.map'.format(
        prefix, model_name)
    with open(mapping_filename, 'wb') as fd:
        fd.write(struct.pack('q', max_label))
        for label in range(max_label):
            fd.write(struct.pack('q', end2end_mapping[label]))

    if evaluate:
        gold = dataIO.ReadGoldData(prefix)

        # run the evaluation framework
        rand_error, vi = comparestacks.VariationOfInformation(
            segmentation, gold)

        # write the output file
        with open('node-results/{}-reduced-{}.txt'.format(prefix, model_name),
                  'w') as fd:
            fd.write('Rand Error Full: {}\n'.format(rand_error[0] +
                                                    rand_error[1]))
            fd.write('Rand Error Merge: {}\n'.format(rand_error[0]))
            fd.write('Rand Error Split: {}\n'.format(rand_error[1]))

            fd.write('Variation of Information Full: {}\n'.format(vi[0] +
                                                                  vi[1]))
            fd.write('Variation of Information Merge: {}\n'.format(vi[0]))
            fd.write('Variation of Information Split: {}\n'.format(vi[1]))
Exemplo n.º 13
0
def GenerateFeatures(prefix, threshold, network_distance):
    # read in the relevant information
    segmentation = dataIO.ReadSegmentationData(prefix)
    gold = dataIO.ReadGoldData(prefix)
    assert (segmentation.shape == gold.shape)
    zres, yres, xres = segmentation.shape

    # get the mapping from the segmentation to gold
    seg2gold_mapping = seg2gold.Mapping(segmentation,
                                        gold,
                                        low_threshold=0.10,
                                        high_threshold=0.80)

    # remove small connected components
    segmentation = seg2seg.RemoveSmallConnectedComponents(
        segmentation, threshold=threshold).astype(np.int64)
    max_label = np.amax(segmentation) + 1

    # get the grid size and the world resolution
    grid_size = segmentation.shape
    world_res = dataIO.Resolution(prefix)

    # get the radius in grid coordinates
    network_radii = np.int64((network_distance / world_res[IB_Z],
                              network_distance / world_res[IB_Y],
                              network_distance / world_res[IB_X]))

    # get all of the skeletons
    skeletons, _, _ = dataIO.ReadSkeletons(prefix, segmentation)

    npositive_instances = [0 for _ in range(10)]
    nnegative_instances = [0 for _ in range(10)]

    positive_candidates = []
    negative_candidates = []

    # iterate over all skeletons
    for skeleton in skeletons:
        label = skeleton.label
        joints = skeleton.joints

        # iterate over all joints
        for joint in joints:
            # get the gold value at this location
            location = joint.GridPoint()
            gold_label = gold[location[IB_Z], location[IB_Y], location[IB_X]]

            # make sure the bounding box fits
            valid_location = True
            for dim in range(NDIMS):
                if location[dim] - network_radii[dim] < 0:
                    valid_location = False
                if location[dim] + network_radii[dim] > grid_size[dim]:
                    valid_location = False
            if not valid_location: continue

            if not gold_label: continue

            neighbors = joint.Neighbors()
            should_split = False

            if len(neighbors) <= 2: continue

            # get the gold for every neighbor
            for neighbor in neighbors:
                neighbor_location = neighbor.GridPoint()
                neighbor_gold_label = gold[neighbor_location[IB_Z],
                                           neighbor_location[IB_Y],
                                           neighbor_location[IB_X]]

                # get the gold value here

                if not gold_label == neighbor_gold_label and gold_label and neighbor_gold_label:
                    should_split = True

            if should_split: npositive_instances[len(neighbors)] += 1
            else: nnegative_instances[len(neighbors)] += 1

            candidate = NuclearCandidate(label, location, should_split)
            if should_split: positive_candidates.append(candidate)
            else: negative_candidates.append(candidate)

    train_filename = 'features/nuclear/{}-{}-{}nm-training.candidates'.format(
        prefix, threshold, network_distance)
    validation_filename = 'features/nuclear/{}-{}-{}nm-validation.candidates'.format(
        prefix, threshold, network_distance)
    forward_filename = 'features/nuclear/{}-{}-{}nm-inference.candidates'.format(
        prefix, threshold, network_distance)
    SaveCandidates(train_filename,
                   positive_candidates,
                   negative_candidates,
                   inference=False,
                   validation=False)
    SaveCandidates(validation_filename,
                   positive_candidates,
                   negative_candidates,
                   inference=False,
                   validation=True)
    SaveCandidates(forward_filename,
                   positive_candidates,
                   negative_candidates,
                   inference=True)

    print '  Positive Candidates: {}'.format(len(positive_candidates))
    print '  Negative Candidates: {}'.format(len(negative_candidates))
    print '  Ratio: {}'.format(
        len(negative_candidates) / float(len(positive_candidates)))
Exemplo n.º 14
0
def GenerateFeatures(prefix, threshold, maximum_distance, network_distance,
                     endpoint_distance, topology):
    start_time = time.time()

    # read in the relevant information
    segmentation = dataIO.ReadSegmentationData(prefix)
    gold = dataIO.ReadGoldData(prefix)
    assert (segmentation.shape == gold.shape)
    zres, yres, xres = segmentation.shape

    # remove small connceted components
    thresholded_segmentation = seg2seg.RemoveSmallConnectedComponents(
        segmentation, threshold=threshold).astype(np.int64)
    max_label = np.amax(segmentation) + 1

    # get the grid size and the world resolution
    grid_size = segmentation.shape
    world_res = dataIO.Resolution(prefix)

    # get the radius in grid coordinates
    radii = np.int64((maximum_distance / world_res[IB_Z],
                      maximum_distance / world_res[IB_Y],
                      maximum_distance / world_res[IB_X]))
    network_radii = np.int64((network_distance / world_res[IB_Z],
                              network_distance / world_res[IB_Y],
                              network_distance / world_res[IB_X]))

    # get all of the skeletons
    if topology:
        skeletons, endpoints = dataIO.ReadTopologySkeletons(
            prefix, thresholded_segmentation)
    else:
        skeletons, _, endpoints = dataIO.ReadSWCSkeletons(
            prefix, thresholded_segmentation)

    # get the set of all considered pairs
    endpoint_candidates = [set() for _ in range(len(endpoints))]
    for ie, endpoint in enumerate(endpoints):
        # extract the region around this endpoint
        label = endpoint.label
        centroid = endpoint.GridPoint()

        # find the candidates near this endpoint
        candidates = set()
        candidates.add(0)
        FindNeighboringCandidates(thresholded_segmentation, centroid,
                                  candidates, maximum_distance, world_res)

        for candidate in candidates:
            # skip extracellular
            if not candidate: continue
            endpoint_candidates[ie].add(candidate)

    # get a mapping from the labels to indices in skeletons and endpoints
    label_to_index = [-1 for _ in range(max_label)]
    for ie, skeleton in enumerate(skeletons):
        label_to_index[skeleton.label] = ie

    # begin pruning the candidates based on the endpoints
    endpoint_pairs = {}

    # find the smallest pair between endpoints
    smallest_distances = {}
    midpoints = {}

    for ie, endpoint in enumerate(endpoints):
        label = endpoint.label
        for neighbor_label in endpoint_candidates[ie]:
            smallest_distances[(label, neighbor_label)] = endpoint_distance + 1
            smallest_distances[(neighbor_label, label)] = endpoint_distance + 1

    for ie, endpoint in enumerate(endpoints):
        # get the endpoint location
        label = endpoint.label

        # go through all currently considered endpoints
        for neighbor_label in endpoint_candidates[ie]:
            for neighbor_endpoint in skeletons[
                    label_to_index[neighbor_label]].endpoints:
                # get the distance
                deltas = endpoint.WorldPoint(
                    world_res) - neighbor_endpoint.WorldPoint(world_res)
                distance = math.sqrt(deltas[IB_Z] * deltas[IB_Z] +
                                     deltas[IB_Y] * deltas[IB_Y] +
                                     deltas[IB_X] * deltas[IB_X])

                if distance < smallest_distances[(label, neighbor_label)]:
                    midpoint = (endpoint.GridPoint() +
                                neighbor_endpoint.GridPoint()) / 2

                    # find the closest pair of endpoints
                    smallest_distances[(label, neighbor_label)] = distance
                    smallest_distances[(neighbor_label, label)] = distance

                    # add to the dictionary
                    endpoint_pairs[(label,
                                    neighbor_label)] = (endpoint,
                                                        neighbor_endpoint)
                    endpoint_pairs[(neighbor_label,
                                    label)] = (neighbor_endpoint, endpoint)

                    midpoints[(label, neighbor_label)] = midpoint
                    midpoints[(neighbor_label, label)] = midpoint

    # create list of candidates
    positive_candidates = []
    negative_candidates = []
    undetermined_candidates = []

    for ie, match in enumerate(endpoint_pairs):
        print '{}/{}'.format(ie, len(endpoint_pairs))
        endpoint_one = endpoint_pairs[match][0]
        endpoint_two = endpoint_pairs[match][1]

        label_one = endpoint_one.label
        label_two = endpoint_two.label

        if label_two > label_one: continue

        # extract a bounding box around this midpoint
        midz, midy, midx = midpoints[(label_one, label_two)]

        zmin = max(0, midz - network_radii[IB_Z])
        ymin = max(0, midy - network_radii[IB_Y])
        xmin = max(0, midx - network_radii[IB_X])
        zmax = min(zres - 1, midz + network_radii[IB_Z] + 1)
        ymax = min(yres - 1, midy + network_radii[IB_Y] + 1)
        xmax = min(xres - 1, midx + network_radii[IB_X] + 1)

        extracted_segmentation = segmentation[zmin:zmax, ymin:ymax, xmin:xmax]
        extracted_gold = gold[zmin:zmax, ymin:ymax, xmin:xmax]

        extracted_seg2gold_mapping = seg2gold.Mapping(extracted_segmentation,
                                                      extracted_gold,
                                                      match_threshold=0.70,
                                                      nonzero_threshold=0.40)

        if label_one > extracted_seg2gold_mapping.size: continue
        if label_two > extracted_seg2gold_mapping.size: continue

        gold_one = extracted_seg2gold_mapping[label_one]
        gold_two = extracted_seg2gold_mapping[label_two]

        ground_truth = (gold_one == gold_two)

        candidate = SkeletonCandidate((label_one, label_two),
                                      midpoints[(label_one, label_two)],
                                      ground_truth)

        if not extracted_seg2gold_mapping[
                label_one] or not extracted_seg2gold_mapping[label_two]:
            undetermined_candidates.append(candidate)
        elif ground_truth:
            positive_candidates.append(candidate)
        else:
            negative_candidates.append(candidate)

    # save positive and negative candidates separately
    positive_filename = 'features/skeleton/{}-{}-{}nm-{}nm-{}nm-positive.candidates'.format(
        prefix, threshold, maximum_distance, endpoint_distance,
        network_distance)
    negative_filename = 'features/skeleton/{}-{}-{}nm-{}nm-{}nm-negative.candidates'.format(
        prefix, threshold, maximum_distance, endpoint_distance,
        network_distance)
    undetermined_filename = 'features/skeleton/{}-{}-{}nm-{}nm-{}nm-undetermined.candidates'.format(
        prefix, threshold, maximum_distance, endpoint_distance,
        network_distance)

    SaveCandidates(positive_filename, positive_candidates)
    SaveCandidates(negative_filename, negative_candidates)
    SaveCandidates(undetermined_filename, undetermined_candidates)

    print 'Positive candidates: {}'.format(len(positive_candidates))
    print 'Negative candidates: {}'.format(len(negative_candidates))
    print 'Undetermined candidates: {}'.format(len(undetermined_candidates))
Exemplo n.º 15
0
def GenerateFeatures(prefix_one, prefix_two, threshold, maximum_distance):
    # read in all relevant information
    segmentation_one = dataIO.ReadSegmentationData(prefix_one)
    segmentation_two = dataIO.ReadSegmentationData(prefix_two)
    assert (segmentation_one.shape == segmentation_two.shape)
    bbox_one = dataIO.GetWorldBBox(prefix_one)
    bbox_two = dataIO.GetWorldBBox(prefix_two)
    world_res = dataIO.Resolution(prefix_one)
    assert (world_res == dataIO.Resolution(prefix_two))

    # get the radii for the relevant region
    radii = (int(maximum_distance / world_res[IB_Z] + 0.5), int(maximum_distance / world_res[IB_Y] + 0.5), int(maximum_distance / world_res[IB_X] + 0.5))



    # parse out small segments
    segmentation_one = seg2seg.RemoveSmallConnectedComponents(segmentation_one, min_size=threshold)
    segmentation_two = seg2seg.RemoveSmallConnectedComponents(segmentation_two, min_size=threshold)

    # get the bounding box for the intersection
    world_box = ib3shapes.IBBox(bbox_one.mins, bbox_one.maxs)
    world_box.Intersection(bbox_two)

    # get the mins and maxs of truncated box
    mins_one = WorldToGrid(world_box.mins, bbox_one)
    mins_two = WorldToGrid(world_box.mins, bbox_two)
    maxs_one = WorldToGrid(world_box.maxs, bbox_one)
    maxs_two = WorldToGrid(world_box.maxs, bbox_two)

    # get the relevant subsections
    segmentation_one = segmentation_one[mins_one[IB_Z]:maxs_one[IB_Z], mins_one[IB_Y]:maxs_one[IB_Y], mins_one[IB_X]:maxs_one[IB_X]]
    segmentation_two = segmentation_two[mins_two[IB_Z]:maxs_two[IB_Z], mins_two[IB_Y]:maxs_two[IB_Y], mins_two[IB_X]:maxs_two[IB_X]]



    # create an emptu set and add dumby variable for numba
    candidates_set = set()
    # this set represents tuples of labels from GRID_ONE and GRID_TWO
    candidates_set.add((np.uint64(0), np.uint64(0)))
    FindOverlapCandidates(segmentation_one, segmentation_two, candidates_set)

    # get the reverse mappings
    forward_mapping_one, reverse_mapping_one = seg2seg.ReduceLabels(segmentation_one)
    forward_mapping_two, reverse_mapping_two = seg2seg.ReduceLabels(segmentation_two)



    # get the number of unique labels
    nlabels_one = reverse_mapping_one.size
    nlabels_two = reverse_mapping_two.size

    # calculate the center of overlap regions
    sums = np.zeros((nlabels_one, nlabels_two, 3), dtype=np.uint64)
    counter = np.zeros((nlabels_one, nlabels_two), dtype=np.uint64)
    FindCenters(segmentation_one, segmentation_two, forward_mapping_one, forward_mapping_two, sums, counter)

    # get the number of occurrences of all labels
    _, counts_one = np.unique(segmentation_one, return_counts=True)
    _, counts_two = np.unique(segmentation_two, return_counts=True)



    # iterate through candidate and locate centers
    candidates = []
    centers = []
    counts = []
    for candidate in candidates_set:
        # skip extracellular space
        if not candidate[0] or not candidate[1]: continue

        # get forward mapping
        index_one = forward_mapping_one[candidate[0]]
        index_two = forward_mapping_two[candidate[1]]

        count = counter[index_one][index_two]
        center = (int(sums[index_one, index_two, IB_Z] / count + 0.5), int(sums[index_one, index_two, IB_Y] / count + 0.5), int(sums[index_one, index_two, IB_X] / count + 0.5))

        # append to the lists
        candidates.append(candidate)
        centers.append(center)
        counts.append((counts_one[index_one], counts_two[index_two], count))



    # find which dimension causes overlap
    if not bbox_one.mins[IB_X] == bbox_two.mins[IB_X]: overlap = IB_X
    if not bbox_one.mins[IB_Y] == bbox_two.mins[IB_Y]: overlap = IB_Y
    if not bbox_one.mins[IB_Z] == bbox_two.mins[IB_Z]: overlap = IB_Z



    # prune the candidates
    indices = PruneCandidates(segmentation_one, segmentation_two, candidates, centers, radii, overlap)
    pruned_candidates = []
    pruned_centers = []
    pruned_counts = []
    for index in indices:
        # add the candidates
        pruned_candidates.append(candidates[index])
        pruned_counts.append(counts[index])    

        center = (centers[index][IB_Z] + world_box.mins[IB_Z], centers[index][IB_Y] + world_box.mins[IB_Y], centers[index][IB_X] + world_box.mins[IB_X])
        pruned_centers.append(center)

    # save all features
    SaveFeatures(prefix_one, prefix_two, pruned_candidates, pruned_centers, pruned_counts, threshold, maximum_distance)