예제 #1
0
def tfrecord_matrix_id_loc_distinct(
        tfrecord_dir, size, channels):  # write shards dynamically when called
    # similarly to tfrecord_matrix_id_loc, but with different id str. Checks if input_utils actually loads several tfrecords.

    tfrecord_loc = os.path.join(tfrecord_dir,
                                's28_matrix_float_0_128_256.tfrecord')
    if os.path.exists(tfrecord_loc):
        os.remove(tfrecord_loc)

    # monotonic labels, to check shuffling
    examples = [
        {
            'matrix': random_image(size, channels),
            'id_str': str(n)
        } for n in range(128, 1024)
    ]  # 3x the size of tfrecord_matrix_id_loc, for testing input read rates

    writer = tf.python_io.TFRecordWriter(tfrecord_loc)
    for example in examples:  # depends on tfrecord.create_tfrecord
        writer.write(
            create_tfrecord.serialize_image_example(matrix=example['matrix'],
                                                    id_str=example['id_str']))
    writer.close()

    return tfrecord_loc
예제 #2
0
def test_serialize_image_example_extra_data(visual_check_image_data,
                                            extra_data_feature_spec):
    label = 1.
    an_int = 1
    a_float = .5
    some_floats = np.array([1., 2., 3.])
    a_string = 'hello world'
    extra_kwargs = {
        'an_int': 1,
        'a_float': .5,
        'some_floats': some_floats,
        'a_string': a_string
    }
    serialized_example = create_tfrecord.serialize_image_example(
        visual_check_image_data, label=label, **extra_kwargs)
    with tf.Session() as sess:
        example = tf.parse_single_example(serialized_example,
                                          extra_data_feature_spec)
        recovered_matrix = tf.io.decode_raw(example['matrix'],
                                            out_type=tf.uint8).eval()
        assert np.allclose(recovered_matrix, visual_check_image_data.flatten())
        assert example['label'].eval() == label
        assert example['an_int'].eval() == an_int
        assert np.isclose(example['a_float'].eval(), a_float)
        assert np.allclose(example['some_floats'].eval(), some_floats)
        assert example['a_string'].eval() == a_string.encode()
예제 #3
0
def example_tfrecords(tfrecord_train_loc, tfrecord_test_loc, example_data):
    tfrecord_locs = [tfrecord_train_loc, tfrecord_test_loc]
    for tfrecord_loc in tfrecord_locs:
        if os.path.exists(tfrecord_loc):
            os.remove(tfrecord_loc)
        writer = tf.python_io.TFRecordWriter(tfrecord_loc)

        for example in example_data:
            writer.write(
                create_tfrecord.serialize_image_example(matrix=example[0],
                                                        label=example[1]))
        writer.close()
예제 #4
0
def test_serialize_image_example(visual_check_image_data, size, channels):
    serialized_example = create_tfrecord.serialize_image_example(
        visual_check_image_data, label=1.)
    # parse back and confirm it matches. Must be within session for tensors to be comparable to np
    with tf.Session() as sess:
        example = tf.parse_single_example(
            serialized_example,
            features=read_tfrecord.matrix_label_feature_spec(size,
                                                             channels,
                                                             float_label=True))
        recovered_matrix = tf.io.decode_raw(example['matrix'],
                                            out_type=tf.uint8).eval()
        assert np.allclose(recovered_matrix, visual_check_image_data.flatten())
        assert example['label'].eval() == 1.
예제 #5
0
def tfrecord_matrix_loc(tfrecord_dir, size,
                        channels):  # write shards dynamically when called

    tfrecord_loc = os.path.join(tfrecord_dir, 's28_matrix_0.tfrecord')
    if os.path.exists(tfrecord_loc):
        os.remove(tfrecord_loc)

    examples = [{'matrix': random_image(size, channels)} for n in range(128)]

    writer = tf.python_io.TFRecordWriter(tfrecord_loc)
    for example in examples:  # depends on tfrecord.create_tfrecord
        writer.write(
            create_tfrecord.serialize_image_example(matrix=example['matrix']))
    writer.close()

    return tfrecord_loc
예제 #6
0
def row_to_serialized_example(row, img_size, columns_to_save, reader):
    # row should have columns that exactly match a read_tfrecord feature spec function

    pil_img = reader(row)
    # pil_img.save('zoobot/test_examples/rescaled_after_pil.png')
    # to align with north/east 
    # TODO refactor this to make sure it matches downloader
    final_pil_img = pil_img.resize(size=(img_size, img_size), resample=Image.LANCZOS).transpose(
        Image.FLIP_TOP_BOTTOM)
    matrix = np.array(final_pil_img)

    extra_kwargs = {}
    for col in columns_to_save:
        extra_kwargs.update({col: row[col]})

    return create_tfrecord.serialize_image_example(matrix, **extra_kwargs)
예제 #7
0
def stratified_tfrecord_locs(tfrecord_dir, stratified_data):
    tfrecord_locs = [
        os.path.join(tfrecord_dir, 'stratified_train.tfrecords'),
        os.path.join(tfrecord_dir, 'stratified_test.tfrecords')
    ]

    for tfrecord_loc in tfrecord_locs:
        if os.path.exists(tfrecord_loc):
            os.remove(tfrecord_loc)

        writer = tf.python_io.TFRecordWriter(tfrecord_loc)
        for example in stratified_data:  # depends on tfrecord.create_tfrecord
            writer.write(
                create_tfrecord.serialize_image_example(matrix=example[0],
                                                        label=example[1]))
        writer.close()

    return tfrecord_locs  # of form [train_loc, test_loc]
예제 #8
0
def tfrecord_matrix_ints_loc(tfrecord_dir, size,
                             channels):  # write shards dynamically when called

    tfrecord_loc = os.path.join(tfrecord_dir, 's28_matrix_float_0.tfrecord')
    if os.path.exists(tfrecord_loc):
        os.remove(tfrecord_loc)

    # monotonic labels, to check shuffling
    examples = [{
        'matrix': random_image(size, channels),
        'label': (n / 128.)
    } for n in range(128)]

    writer = tf.python_io.TFRecordWriter(tfrecord_loc)
    for example in examples:  # depends on tfrecord.create_tfrecord
        writer.write(
            create_tfrecord.serialize_image_example(matrix=example['matrix'],
                                                    label=example['label']))
    writer.close()

    return tfrecord_loc
예제 #9
0
def shard_locs(tfrecord_dir, size,
               channels):  # write shards dynamically when called

    shard_names = ['s28_shard_0.tfrecord', 's28_shard_1.tfrecord']
    tfrecord_locs = list(
        map(lambda x: os.path.join(tfrecord_dir, x), shard_names))

    for tfrecord_n, tfrecord_loc in enumerate(tfrecord_locs):
        if os.path.exists(tfrecord_loc):
            os.remove(tfrecord_loc)

        examples = [{
            'matrix': random_image(size, channels),
            'id_str': str(tfrecord_n) + '_' + str(n)
        } for n in range(128)]

        writer = tf.python_io.TFRecordWriter(tfrecord_loc)
        for example in examples:  # depends on tfrecord.create_tfrecord
            writer.write(
                create_tfrecord.serialize_image_example(
                    matrix=example['matrix'], id_str=example['id_str']))
        writer.close()

    return tfrecord_locs  # of form [train_loc, test_loc]
예제 #10
0
def serialized_matrix_label_id_example(size, channels, unique_id):
    return create_tfrecord.serialize_image_example(matrix=random_image(
        size, channels),
                                                   label=1.,
                                                   id_str=unique_id)
예제 #11
0
def serialized_matrix_label_example(size, channels):
    return create_tfrecord.serialize_image_example(matrix=random_image(
        size, channels),
                                                   label=1.)