Python ImageCaptioner示例，detailed_captioning.layers.image_captioner.ImageCaptioner Python示例

示例#1

0

显示文件

def main(unused_argv):

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, spatial_features, input_seq, target_seq, indicator = (
            import_mscoco(mode="train",
                          batch_size=BATCH_SIZE,
                          num_epochs=100,
                          is_mini=True))
        visual_sentinel_cell = VisualSentinelCell(300)
        image_captioner = ImageCaptioner(visual_sentinel_cell, vocab,
                                         pretrained_matrix)
        logits, ids = image_captioner(lengths=tf.reduce_sum(indicator, axis=1),
                                      spatial_image_features=spatial_features,
                                      seq_inputs=input_seq)
        tf.losses.sparse_softmax_cross_entropy(target_seq,
                                               logits,
                                               weights=indicator)
        loss = tf.losses.get_total_loss()
        learning_step = tf.train.GradientDescentOptimizer(1.0).minimize(
            loss, var_list=image_captioner.variables)

        captioner_saver = tf.train.Saver(var_list=image_captioner.variables)
        captioner_ckpt, captioner_ckpt_name = get_visual_sentinel_checkpoint()
        with tf.Session() as sess:
            sess.run(tf.variables_initializer(image_captioner.variables))
            if captioner_ckpt is not None:
                captioner_saver.restore(sess, captioner_ckpt)
            captioner_saver.save(sess, captioner_ckpt_name)
            last_save = time.time()
            for i in itertools.count():
                time_start = time.time()
                try:
                    _ids, _loss, _learning_step = sess.run(
                        [ids, loss, learning_step])
                except:
                    break
                print(
                    PRINT_STRING.format(
                        i, _loss,
                        list_of_ids_to_string(_ids[0, :].tolist(), vocab),
                        BATCH_SIZE / (time.time() - time_start)))
                new_save = time.time()
                if new_save - last_save > 3600:  # save the model every hour
                    captioner_saver.save(sess, captioner_ckpt_name)
                    last_save = new_save

            captioner_saver.save(sess, captioner_ckpt_name)
            print("Finishing training.")

示例#2

0

显示文件

文件： eval_show_and_tell.py 项目： zbxzc35/detailed_captioning

BEAM_SIZE = 16

if __name__ == "__main__":

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, mean_features, input_seq, target_seq, indicator = (
            import_mscoco(mode="train",
                          batch_size=BATCH_SIZE,
                          num_epochs=1,
                          is_mini=True))
        image_captioner = ImageCaptioner(ShowAndTellCell(300),
                                         vocab,
                                         pretrained_matrix,
                                         trainable=False,
                                         beam_size=BEAM_SIZE)
        logits, ids = image_captioner(mean_image_features=mean_features)
        captioner_saver = tf.train.Saver(
            var_list=remap_decoder_name_scope(image_captioner.variables))
        captioner_ckpt, captioner_ckpt_name = get_show_and_tell_checkpoint()

        with tf.Session() as sess:

            assert (captioner_ckpt is not None)
            captioner_saver.restore(sess, captioner_ckpt)
            used_ids = set()
            json_dump = []

            for i in itertools.count():

示例#3

0

显示文件

文件： train_up_down.py 项目： stjordanis/detailed_captioning

def main(unused_argv):

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, mean_features, object_features, input_seq, target_seq, indicator = import_mscoco(
            mode="train",
            batch_size=FLAGS.batch_size,
            num_epochs=FLAGS.num_epochs,
            is_mini=FLAGS.is_mini)
        up_down_cell = UpDownCell(300)
        image_captioner = ImageCaptioner(up_down_cell, vocab,
                                         pretrained_matrix)
        logits, ids = image_captioner(lengths=tf.reduce_sum(indicator, axis=1),
                                      mean_image_features=mean_features,
                                      mean_object_features=object_features,
                                      seq_inputs=input_seq)
        tf.losses.sparse_softmax_cross_entropy(target_seq,
                                               logits,
                                               weights=indicator)
        loss = tf.losses.get_total_loss()

        global_step = tf.train.get_or_create_global_step()
        optimizer = tf.train.AdamOptimizer()
        learning_step = optimizer.minimize(loss,
                                           var_list=image_captioner.variables,
                                           global_step=global_step)

        captioner_saver = tf.train.Saver(var_list=image_captioner.variables +
                                         [global_step])
        captioner_ckpt, captioner_ckpt_name = get_up_down_checkpoint()
        with tf.Session() as sess:

            sess.run(tf.variables_initializer(optimizer.variables()))
            if captioner_ckpt is not None:
                captioner_saver.restore(sess, captioner_ckpt)
            else:
                sess.run(
                    tf.variables_initializer(image_captioner.variables +
                                             [global_step]))
            captioner_saver.save(sess,
                                 captioner_ckpt_name,
                                 global_step=global_step)
            last_save = time.time()

            for i in itertools.count():

                time_start = time.time()
                try:
                    _target, _ids, _loss, _learning_step = sess.run(
                        [target_seq, ids, loss, learning_step])
                except:
                    break

                iteration = sess.run(global_step)

                print(
                    PRINT_STRING.format(
                        iteration, _loss,
                        list_of_ids_to_string(_ids[0, :].tolist(), vocab),
                        list_of_ids_to_string(_target[0, :].tolist(), vocab),
                        FLAGS.batch_size / (time.time() - time_start)))

                new_save = time.time()
                if new_save - last_save > 3600:  # save the model every hour
                    captioner_saver.save(sess,
                                         captioner_ckpt_name,
                                         global_step=global_step)
                    last_save = new_save

            captioner_saver.save(sess,
                                 captioner_ckpt_name,
                                 global_step=global_step)
            print("Finishing training.")

示例#4

0

显示文件

FLAGS = tf.flags.FLAGS

if __name__ == "__main__":

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, spatial_features, input_seq, target_seq, indicator = import_mscoco(
            mode=FLAGS.mode,
            batch_size=FLAGS.batch_size,
            num_epochs=1,
            is_mini=FLAGS.is_mini)
        image_captioner = ImageCaptioner(ShowAttendAndTellCell(300),
                                         vocab,
                                         pretrained_matrix,
                                         trainable=False,
                                         beam_size=FLAGS.beam_size)
        logits, ids = image_captioner(spatial_image_features=spatial_features)
        captioner_saver = tf.train.Saver(
            var_list=remap_decoder_name_scope(image_captioner.variables))
        captioner_ckpt, captioner_ckpt_name = get_show_attend_and_tell_checkpoint(
        )

        with tf.Session() as sess:

            assert (captioner_ckpt is not None)
            captioner_saver.restore(sess, captioner_ckpt)
            used_ids = set()
            json_dump = []

示例#5

0

显示文件

文件： eval_spatial_attention.py 项目： stjordanis/detailed_captioning

FLAGS = tf.flags.FLAGS

if __name__ == "__main__":

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, spatial_features, input_seq, target_seq, indicator = import_mscoco(
            mode=FLAGS.mode,
            batch_size=FLAGS.batch_size,
            num_epochs=1,
            is_mini=FLAGS.is_mini)
        image_captioner = ImageCaptioner(SpatialAttentionCell(300),
                                         vocab,
                                         pretrained_matrix,
                                         trainable=False,
                                         beam_size=FLAGS.beam_size)
        logits, ids = image_captioner(spatial_image_features=spatial_features)
        captioner_saver = tf.train.Saver(
            var_list=remap_decoder_name_scope(image_captioner.variables))
        captioner_ckpt, captioner_ckpt_name = get_spatial_attention_checkpoint(
        )

        with tf.Session() as sess:

            assert (captioner_ckpt is not None)
            captioner_saver.restore(sess, captioner_ckpt)
            used_ids = set()
            json_dump = []

示例#6

0

显示文件

def main(unused_argv):

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, mean_features, input_seq, target_seq, indicator = (
            import_mscoco(mode="train",
                          batch_size=BATCH_SIZE,
                          num_epochs=100,
                          is_mini=True))
        show_and_tell_cell = ShowAndTellCell(300)
        image_captioner = ImageCaptioner(show_and_tell_cell, vocab,
                                         pretrained_matrix)
        logits, ids = image_captioner(lengths=tf.reduce_sum(indicator, axis=1),
                                      mean_image_features=mean_features,
                                      seq_inputs=input_seq)
        tf.losses.sparse_softmax_cross_entropy(target_seq,
                                               logits,
                                               weights=indicator)
        loss = tf.losses.get_total_loss()

        global_step = tf.train.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(
            INITIAL_LEARNING_RATE,
            global_step, (TRAINING_EXAMPLES // BATCH_SIZE) * EPOCHS_PER_DECAY,
            DECAY_RATE,
            staircase=True)
        learning_step = tf.train.GradientDescentOptimizer(
            learning_rate).minimize(loss,
                                    var_list=image_captioner.variables,
                                    global_step=global_step)

        captioner_saver = tf.train.Saver(var_list=image_captioner.variables +
                                         [global_step])
        captioner_ckpt, captioner_ckpt_name = get_show_and_tell_checkpoint()
        with tf.Session() as sess:

            if captioner_ckpt is not None:
                captioner_saver.restore(sess, captioner_ckpt)
            else:
                sess.run(
                    tf.variables_initializer(image_captioner.variables +
                                             [global_step]))
            captioner_saver.save(sess,
                                 captioner_ckpt_name,
                                 global_step=global_step)
            last_save = time.time()

            for i in itertools.count():

                time_start = time.time()
                try:
                    _ids, _loss, _learning_step = sess.run(
                        [ids, loss, learning_step])
                except:
                    break

                iteration = sess.run(global_step)

                print(
                    PRINT_STRING.format(
                        iteration, _loss,
                        list_of_ids_to_string(_ids[0, :].tolist(), vocab),
                        BATCH_SIZE / (time.time() - time_start)))

                new_save = time.time()
                if new_save - last_save > 3600:  # save the model every hour
                    captioner_saver.save(sess,
                                         captioner_ckpt_name,
                                         global_step=global_step)
                    last_save = new_save

            captioner_saver.save(sess,
                                 captioner_ckpt_name,
                                 global_step=global_step)
            print("Finishing training.")

示例#7

0

显示文件

文件： eval_visual_sentinel.py 项目： stjordanis/detailed_captioning

FLAGS = tf.flags.FLAGS

if __name__ == "__main__":

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, spatial_features, input_seq, target_seq, indicator = import_mscoco(
            mode=FLAGS.mode,
            batch_size=FLAGS.batch_size,
            num_epochs=1,
            is_mini=FLAGS.is_mini)
        image_captioner = ImageCaptioner(VisualSentinelCell(300),
                                         vocab,
                                         pretrained_matrix,
                                         trainable=False,
                                         beam_size=FLAGS.beam_size)
        logits, ids = image_captioner(spatial_image_features=spatial_features)
        captioner_saver = tf.train.Saver(
            var_list=remap_decoder_name_scope(image_captioner.variables))
        captioner_ckpt, captioner_ckpt_name = get_visual_sentinel_checkpoint()

        with tf.Session() as sess:

            assert (captioner_ckpt is not None)
            captioner_saver.restore(sess, captioner_ckpt)
            used_ids = set()
            json_dump = []

            for i in itertools.count():

示例#8

0

显示文件

FLAGS = tf.flags.FLAGS

if __name__ == "__main__":

    vocab, pretrained_matrix = load_glove(vocab_size=100000,
                                          embedding_size=300)
    with tf.Graph().as_default():

        image_id, mean_features, object_features, input_seq, target_seq, indicator = import_mscoco(
            mode=FLAGS.mode,
            batch_size=FLAGS.batch_size,
            num_epochs=1,
            is_mini=FLAGS.is_mini)
        image_captioner = ImageCaptioner(UpDownCell(300),
                                         vocab,
                                         pretrained_matrix,
                                         trainable=False,
                                         beam_size=FLAGS.beam_size)
        logits, ids = image_captioner(mean_image_features=mean_features,
                                      mean_object_features=object_features)
        captioner_saver = tf.train.Saver(
            var_list=remap_decoder_name_scope(image_captioner.variables))
        captioner_ckpt, captioner_ckpt_name = get_up_down_checkpoint()

        with tf.Session() as sess:

            assert (captioner_ckpt is not None)
            captioner_saver.restore(sess, captioner_ckpt)
            used_ids = set()
            json_dump = []