Exemplos de WriteH5File em Python, exemplos de biologicalgraphs.utilities.dataIO.WriteH5File em Python

Exemplo n.º 1

0

Exibir arquivo

def GenerateEdges(prefix, segmentation, subset, seg2gold_mapping=None):
    # parameters from CVPR submission
    network_radius = 600
    maximum_distance = 500
    width = (18, 52, 52)

    # create the directory structure to save the features in
    # forward is needed for training and validation data that is cropped
    CreateDirectoryStructure(width, network_radius,
                             ['training', 'validation', 'testing', 'forward'],
                             'edges')

    # get the size of the data
    zres, yres, xres = segmentation.shape

    # make sure the subset is one of three categories
    assert (subset == 'training' or subset == 'validation'
            or subset == 'testing')

    # crop the subset if it overlaps with testing data
    ((cropped_zmin, cropped_zmax), (cropped_ymin, cropped_ymax),
     (cropped_xmin, cropped_xmax)) = dataIO.CroppingBox(prefix)

    # call the function to actually generate the edges
    edges = EndpointTraversal(prefix, segmentation, maximum_distance)

    # create list for all relevant examples
    positive_examples = []
    negative_examples = []
    unknown_examples = []
    forward_positive_examples = []
    forward_negative_examples = []
    forward_unknown_examples = []

    for edge in edges:
        zpoint, ypoint, xpoint = (edge[IB_Z], edge[IB_Y], edge[IB_X])
        label_one, label_two = edge[3], edge[4]

        # if the center of the point falls outside the cropped box do not include it in training or validation
        forward = False
        # however, you allow it for forward inference
        if (zpoint < cropped_zmin or cropped_zmax <= zpoint): forward = True
        if (ypoint < cropped_ymin or cropped_ymax <= ypoint): forward = True
        if (xpoint < cropped_xmin or cropped_xmax <= xpoint): forward = True

        # see if these two segments belong to the same neuron
        if not seg2gold_mapping is None:
            gold_one = seg2gold_mapping[label_one]
            gold_two = seg2gold_mapping[label_two]
        else:
            gold_one = -1
            gold_two = -1

        # create lists of locations where these point occur
        if forward:
            if gold_one < 1 or gold_two < 1:
                forward_unknown_examples.append(edge)
            elif gold_one == gold_two:
                forward_positive_examples.append(edge)
            else:
                forward_negative_examples.append(edge)
        else:
            if gold_one < 1 or gold_two < 1:
                unknown_examples.append(edge)
            elif gold_one == gold_two:
                positive_examples.append(edge)
            else:
                negative_examples.append(edge)

    print 'No. Positive Edges: {}'.format(len(positive_examples))
    print 'No. Negative Edges: {}'.format(len(negative_examples))
    print 'No. Unknown Edges: {}'.format(len(unknown_examples))

    parent_directory = 'features/biological/edges-{}nm-{}x{}x{}'.format(
        network_radius, width[IB_Z], width[IB_Y], width[IB_X])

    if len(positive_examples):
        # save the examples
        positive_filename = '{}/{}/positives/{}.examples'.format(
            parent_directory, subset, prefix)
        with open(positive_filename, 'wb') as fd:
            fd.write(struct.pack('q', len(positive_examples)))
            for ie, example in enumerate(positive_examples):
                fd.write(
                    struct.pack('qqqqqq', example[0], example[1], example[2],
                                example[3], example[4], example[5]))

        # create new examples array to remove last element
        examples = []
        for example in positive_examples:
            examples.append(example[0:5])

        positive_examples_array = GenerateExamplesArray(
            prefix, segmentation, examples, width, network_radius)
        dataIO.WriteH5File(positive_examples_array,
                           '{}/{}/positives/{}-examples.h5'.format(
                               parent_directory, subset, prefix),
                           'main',
                           compression=True)
        del positive_examples_array

    if len(negative_examples):
        # save the examples
        negative_filename = '{}/{}/negatives/{}.examples'.format(
            parent_directory, subset, prefix)
        with open(negative_filename, 'wb') as fd:
            fd.write(struct.pack('q', len(negative_examples)))
            for example in negative_examples:
                fd.write(
                    struct.pack('qqqqqq', example[0], example[1], example[2],
                                example[3], example[4], example[5]))

        # create new examples array to remove last element
        examples = []
        for example in negative_examples:
            examples.append(example[0:5])

        negative_examples_array = GenerateExamplesArray(
            prefix, segmentation, examples, width, network_radius)
        dataIO.WriteH5File(negative_examples_array,
                           '{}/{}/negatives/{}-examples.h5'.format(
                               parent_directory, subset, prefix),
                           'main',
                           compression=True)
        del negative_examples_array

    if len(unknown_examples):
        # save the examples
        unknown_filename = '{}/{}/unknowns/{}.examples'.format(
            parent_directory, subset, prefix)
        with open(unknown_filename, 'wb') as fd:
            fd.write(struct.pack('q', len(unknown_examples)))
            for example in unknown_examples:
                fd.write(
                    struct.pack('qqqqqq', example[0], example[1], example[2],
                                example[3], example[4], example[5]))

        # create new examples array to remove last element
        examples = []
        for example in unknown_examples:
            examples.append(example[0:5])

        unknown_examples_array = GenerateExamplesArray(prefix, segmentation,
                                                       examples, width,
                                                       network_radius)
        dataIO.WriteH5File(unknown_examples_array,
                           '{}/{}/unknowns/{}-examples.h5'.format(
                               parent_directory, subset, prefix),
                           'main',
                           compression=True)
        del unknown_examples_array

    if len(forward_positive_examples):
        # save the examples
        forward_positive_filename = '{}/forward/positives/{}.examples'.format(
            parent_directory, prefix)
        with open(forward_positive_filename, 'wb') as fd:
            fd.write(struct.pack('q', len(forward_positive_examples)))
            for example in forward_positive_examples:
                fd.write(
                    struct.pack('qqqqqq', example[0], example[1], example[2],
                                example[3], example[4], example[5]))

        # create new examples array to remove last element
        examples = []
        for example in forward_positive_examples:
            examples.append(example[0:5])

        forward_positive_examples_array = GenerateExamplesArray(
            prefix, segmentation, examples, width, network_radius)
        dataIO.WriteH5File(forward_positive_examples_array,
                           '{}/forward/positives/{}-examples.h5'.format(
                               parent_directory, prefix),
                           'main',
                           compression=True)
        del forward_positive_examples_array

    if len(forward_negative_examples):
        # save the examples
        forward_negative_filename = '{}/forward/negatives/{}.examples'.format(
            parent_directory, prefix)
        with open(forward_negative_filename, 'wb') as fd:
            fd.write(struct.pack('q', len(forward_negative_examples)))
            for example in forward_negative_examples:
                fd.write(
                    struct.pack('qqqqqq', example[0], example[1], example[2],
                                example[3], example[4], example[5]))

        # create new examples array to remove last element
        examples = []
        for example in forward_negative_examples:
            examples.append(example[0:5])

        forward_negative_examples_array = GenerateExamplesArray(
            prefix, segmentation, examples, width, network_radius)
        dataIO.WriteH5File(forward_negative_examples_array,
                           '{}/forward/negatives/{}-examples.h5'.format(
                               parent_directory, prefix),
                           'main',
                           compression=True)
        del forward_negative_examples_array

    if len(forward_unknown_examples):
        # save the examples
        forward_unknown_filename = '{}/forward/unknowns/{}.examples'.format(
            parent_directory, prefix)
        with open(forward_unknown_filename, 'wb') as fd:
            fd.write(struct.pack('q', len(forward_unknown_examples)))
            for example in forward_unknown_examples:
                fd.write(
                    struct.pack('qqqqqq', example[0], example[1], example[2],
                                example[3], example[4], example[5]))

        # create new examples array to remove last element
        examples = []
        for example in forward_unknown_examples:
            examples.append(example[0:5])

        forward_unknown_examples_array = GenerateExamplesArray(
            prefix, segmentation, examples, width, network_radius)
        dataIO.WriteH5File(forward_unknown_examples_array,
                           '{}/forward/unknowns/{}-examples.h5'.format(
                               parent_directory, prefix),
                           'main',
                           compression=True)
        del forward_unknown_examples_array

Exemplo n.º 2

0

Exibir arquivo

def CollapseGraph(prefix, segmentation, vertex_ones, vertex_twos,
                  maintained_edges, algorithm, evaluate):
    # get the number of edges
    nedges = maintained_edges.shape[0]

    # create the union find data structure and collapse the graph
    max_label = np.amax(segmentation) + 1
    union_find = [unionfind.UnionFindElement(iv) for iv in range(max_label)]

    # go through all of the edges
    for ie in range(nedges):
        # skip if the edge should not collapse
        if maintained_edges[ie]: continue

        # merge these vertices
        vertex_one = vertex_ones[ie]
        vertex_two = vertex_twos[ie]

        unionfind.Union(union_find[vertex_one], union_find[vertex_two])

    # create the mapping and save the result
    mapping = np.zeros(max_label, dtype=np.int64)
    for iv in range(max_label):
        mapping[iv] = unionfind.Find(union_find[iv]).label

    # apply the mapping and save the result
    seg2seg.MapLabels(segmentation, mapping)

    segmentation_filename = 'segmentations/{}-{}.h5'.format(prefix, algorithm)
    dataIO.WriteH5File(segmentation, segmentation_filename, 'main')

    # spawn a new meta file
    dataIO.SpawnMetaFile(prefix, segmentation_filename, 'main')

    # evaluate if gold data exists
    if evaluate:
        # get the variation of information for this result
        new_prefix = segmentation_filename.split('/')[1][:-3]

        # read in the new gold data
        gold = dataIO.ReadGoldData(prefix)

        rand_error, vi = comparestacks.VariationOfInformation(
            new_prefix, segmentation, gold)

        print 'Rand Error Full: {}'.format(rand_error[0] + rand_error[1])
        print 'Rand Error Merge: {}'.format(rand_error[0])
        print 'Rand Error Split: {}'.format(rand_error[1])

        print 'Variation of Information Full: {}'.format(vi[0] + vi[1])
        print 'Variation of Information Merge: {}'.format(vi[0])
        print 'Variation of Information Split: {}'.format(vi[1])

        if not os.path.exists('results'): os.mkdir('results')
        with open('results/{}-{}.txt'.format(prefix, algorithm), 'w') as fd:
            fd.write('Rand Error Full: {}\n'.format(rand_error[0] +
                                                    rand_error[1]))
            fd.write('Rand Error Merge: {}\n'.format(rand_error[0]))
            fd.write('Rand Error Split: {}\n'.format(rand_error[1]))

            fd.write('Variation of Information Full: {}\n'.format(vi[0] +
                                                                  vi[1]))
            fd.write('Variation of Information Merge: {}\n'.format(vi[0]))
            fd.write('Variation of Information Split: {}\n'.format(vi[1]))

Exemplo n.º 3

0

Exibir arquivo

def MergeGroundTruth(prefix, model_prefix):
    # read the segmentation data
    segmentation = dataIO.ReadSegmentationData(prefix)

    # get the multicut filename (with graph weights)
    multicut_filename = 'multicut/{}-{}.graph'.format(model_prefix, prefix)

    # read the gold data
    gold = dataIO.ReadGoldData(prefix)

    # read in the segmentation to gold mapping
    mapping = seg2gold.Mapping(segmentation, gold)

    # get the maximum segmentation value
    max_value = np.amax(segmentation) + 1

    # create union find data structure
    union_find = [UnionFind.UnionFindElement(iv) for iv in range(max_value)]

    # read in all of the labels
    with open(multicut_filename, 'rb') as fd:
        # read the number of vertices and edges
        nvertices, nedges, = struct.unpack('QQ', fd.read(16))

        # read in all of the edges
        for ie in range(nedges):
            # read in the two labels
            label_one, label_two, = struct.unpack('QQ', fd.read(16))

            # skip over the reduced labels and edge weight
            fd.read(24)

            # if the labels are the same and the mapping is non zero
            if mapping[label_one] == mapping[label_two] and mapping[label_one]:
                UnionFind.Union(union_find[label_one], union_find[label_two])

    # create a mapping
    mapping = np.zeros(max_value, dtype=np.int64)

    # update the segmentation
    for iv in range(max_value):
        label = UnionFind.Find(union_find[iv]).label

        mapping[iv] = label

    merged_segmentation = seg2seg.MapLabels(segmentation, mapping)

    gold_filename = 'gold/{}_gold.h5'.format(prefix)

    # TODO fix this code temporary filename
    truth_filename = 'multicut/{}-truth.h5'.format(prefix)

    # temporary write h5 file
    dataIO.WriteH5File(merged_segmentation, truth_filename, 'stack')

    import time
    start_time = time.time()
    print ('Ground truth: ')
    # create the command line
    command = '~/software/PixelPred2Seg/comparestacks --stack1 {} --stackbase {} --dilate1 1 --dilatebase 1 --relabel1 --relabelbase --filtersize 100 --anisotropic'.format(truth_filename, gold_filename)

    # execute the command
    os.system(command)
    print (time.time() - start_time)

Exemplo n.º 4

0

Exibir arquivo

def Agglomerate(prefix, model_prefix, threshold=0.5):
    # read the segmentation data
    segmentation = dataIO.ReadSegmentationData(prefix)

    # get the multicut filename (with graph weights)
    multicut_filename = 'multicut/{}-{}.graph'.format(model_prefix, prefix)

    # get the maximum segmentation value
    max_value = np.amax(segmentation) + 1

    # create union find data structure
    union_find = [UnionFind.UnionFindElement(iv) for iv in range(max_value)]

    # read in all of the labels and merge the result
    with open(multicut_filename, 'rb') as fd:
        # read the number of vertices and edges
        nvertices, nedges, = struct.unpack('QQ', fd.read(16))

        # read in all of the edges
        for ie in range(nedges):
            # read in both labels
            label_one, label_two, = struct.unpack('QQ', fd.read(16))

            # skip over the reduced labels
            fd.read(16)

            # read in the edge weight
            edge_weight, = struct.unpack('d', fd.read(8))

            # merge label one and label two in the union find data structure
            if (edge_weight > threshold):
                UnionFind.Union(union_find[label_one], union_find[label_two])

    # create a mapping
    mapping = np.zeros(max_value, dtype=np.int64)

    # update the segmentation
    for iv in range(max_value):
        label = UnionFind.Find(union_find[iv]).label

        mapping[iv] = label

    # update the labels
    agglomerated_segmentation = seg2seg.MapLabels(segmentation, mapping)

    gold_filename = 'gold/{}_gold.h5'.format(prefix)

    # TODO fix this code temporary filename
    agglomeration_filename = 'multicut/{}-agglomerate.h5'.format(prefix)

    # temporary - write h5 file
    dataIO.WriteH5File(agglomerated_segmentation, agglomeration_filename, 'stack')

    import time
    start_time = time.time()
    print ('Agglomeration - {}:'.format(threshold))
    # create the command line
    command = '~/software/PixelPred2Seg/comparestacks --stack1 {} --stackbase {} --dilate1 1 --dilatebase 1 --relabel1 --relabelbase --filtersize 100 --anisotropic'.format(agglomeration_filename, gold_filename)

    # execute the command
    os.system(command)
    print (time.time() - start_time)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: forward.py Projeto: ngchc/biologicalgraphs

def Forward(prefix,
            model_prefix,
            segmentation,
            subset,
            seg2gold_mapping=None,
            evaluate=False):
    # optimal parameters from paper (width starts with 3 channels)
    width = (3, 20, 60, 60)
    radius = 400
    threshold_volume = 10368000

    # read in the trained model
    model = model_from_json(open('{}.json'.format(model_prefix), 'r').read())
    model.load_weights('{}-best-loss.h5'.format(model_prefix))

    # get all of the examples
    examples, npositives, nnegatives = CollectExamples(prefix, width, radius,
                                                       subset)

    # get all of the large-small pairings
    pairings = CollectLargeSmallPairs(prefix, width, radius, subset)
    #assert (len(pairings) == examples.shape[0])

    # get the threshold in terms of number of voxels
    resolution = dataIO.Resolution(prefix)
    threshold = int(threshold_volume /
                    (resolution[IB_Z] * resolution[IB_Y] * resolution[IB_X]))

    # get the list of nodes over and under the threshold
    small_segments, large_segments = FindSmallSegments(segmentation, threshold)

    # get all of the probabilities
    probabilities = model.predict_generator(NodeGenerator(examples, width),
                                            examples.shape[0],
                                            max_q_size=1000)

    # save the probabilities to a file
    output_filename = '{}-{}.probabilities'.format(model_prefix, prefix)
    with open(output_filename, 'wb') as fd:
        fd.write(struct.pack('q', examples.shape[0]))
        for ie, (label_one, label_two) in enumerate(pairings):
            fd.write(
                struct.pack('qqd', label_one, label_two, probabilities[ie]))

    # create the correct labels for the ground truth
    ground_truth = np.zeros(npositives + nnegatives, dtype=np.bool)
    for iv in range(npositives):
        ground_truth[iv] = True

    # get the results with labeled data
    predictions = Prob2Pred(np.squeeze(probabilities[:npositives +
                                                     nnegatives]))

    # print the confusion matrix
    if not seg2gold_mapping is None:
        output_filename = '{}-{}-inference.txt'.format(model_prefix, prefix)
        PrecisionAndRecall(ground_truth, predictions, output_filename)

    # create a mapping
    small_segment_predictions = dict()
    for small_segment in small_segments:
        small_segment_predictions[small_segment] = set()

    # go through each pairing
    for pairing, probability in zip(pairings, probabilities):
        label_one, label_two = pairing
        # make sure that either label one or two is small and the other is large
        assert ((label_one in small_segments) ^ (label_two in small_segments))

        if label_one in small_segments:
            small_segment = label_one
            large_segment = label_two
        else:
            small_segment = label_two
            large_segment = label_one

        small_segment_predictions[small_segment].add(
            (large_segment, probability[0]))

    # begin to map the small labels
    max_label = np.amax(segmentation) + 1
    mapping = [iv for iv in range(max_label)]

    if not seg2gold_mapping is None:
        ncorrect_merges = 0
        nincorrect_merges = 0

    # go through all of the small segments
    for small_segment in small_segments:
        best_probability = -1
        best_large_segment = -1

        # go through all the neighboring large segments
        for large_segment, probability in small_segment_predictions[
                small_segment]:
            if probability > best_probability:
                best_probability = probability
                best_large_segment = large_segment

        # this should almost never happen but if it does just continue
        if best_large_segment == -1 or best_probability < 0.5:
            mapping[small_segment] = small_segment
            continue
        # get all of the best large segments
        else:
            mapping[small_segment] = best_large_segment

        if not seg2gold_mapping is None:
            # don't consider undetermined locations
            if seg2gold_mapping[small_segment] < 1 or seg2gold_mapping[
                    best_large_segment] < 1:
                continue

            if seg2gold_mapping[small_segment] == seg2gold_mapping[
                    best_large_segment]:
                ncorrect_merges += 1
            else:
                nincorrect_merges += 1

    if not seg2gold_mapping is None:
        print '\nResults:'
        print '  Correctly Merged: {}'.format(ncorrect_merges)
        print '  Incorrectly Merged: {}'.format(nincorrect_merges)

        with open(output_filename, 'a') as fd:
            fd.write('\nResults:\n')
            fd.write('  Correctly Merged: {}\n'.format(ncorrect_merges))
            fd.write('  Incorrectly Merged: {}\n'.format(nincorrect_merges))

    # save the node mapping in the cache for later
    end2end_mapping = [mapping[iv] for iv in range(max_label)]

    # initiate the mapping to eliminate small segments
    seg2seg.MapLabels(segmentation, mapping)

    # reduce the labels and map again
    mapping, _ = seg2seg.ReduceLabels(segmentation)
    seg2seg.MapLabels(segmentation, mapping)

    # update the end to end mapping with the reduced labels
    for iv in range(max_label):
        end2end_mapping[iv] = mapping[end2end_mapping[iv]]

    # get the model name (first component is architecture and third is node-)
    model_name = model_prefix.split('/')[1]
    segmentation_filename = 'segmentations/{}-reduced-{}.h5'.format(
        prefix, model_name)
    dataIO.WriteH5File(segmentation, segmentation_filename, 'main')

    # spawn a new meta file
    dataIO.SpawnMetaFile(prefix, segmentation_filename, 'main')

    # save the end to end mapping in the cache
    if not os.path.exists('cache'): os.mkdir('cache')
    mapping_filename = 'cache/{}-reduced-{}-end2end.map'.format(
        prefix, model_name)
    with open(mapping_filename, 'wb') as fd:
        fd.write(struct.pack('q', max_label))
        for label in range(max_label):
            fd.write(struct.pack('q', end2end_mapping[label]))

    if evaluate:
        gold = dataIO.ReadGoldData(prefix)

        new_prefix = segmentation_filename.split('/')[-1].split('.')[0]

        # run the evaluation framework
        rand_error, vi = comparestacks.VariationOfInformation(
            new_prefix, segmentation, gold)

        # write the output file
        if not os.path.exists('results'): os.mkdir('results')
        with open('results/{}-reduced-{}.txt'.format(prefix, model_name),
                  'w') as fd:
            fd.write('Rand Error Full: {}\n'.format(rand_error[0] +
                                                    rand_error[1]))
            fd.write('Rand Error Merge: {}\n'.format(rand_error[0]))
            fd.write('Rand Error Split: {}\n'.format(rand_error[1]))

            fd.write('Variation of Information Full: {}\n'.format(vi[0] +
                                                                  vi[1]))
            fd.write('Variation of Information Merge: {}\n'.format(vi[0]))
            fd.write('Variation of Information Split: {}\n'.format(vi[1]))