Exemplo n.º 1
0
def train_advanced_model(weight=None, batch_size=32, epochs=10):

    cg = caption_generator.CaptionGenerator()
    model = cg.create_advanced_model()

    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = 'weights-improvement-{epoch:02d}.hdf5'
    checkpoint = ModelCheckpoint(file_name,
                                 monitor='loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    callbacks_list = [checkpoint]
    model.fit_generator(cg.data_generator(batch_size=batch_size),
                        steps_per_epoch=cg.total_samples / batch_size,
                        epochs=epochs,
                        verbose=2,
                        callbacks=callbacks_list)
    try:
        model.save('Models/WholeModel.h5', overwrite=True)
        model.save_weights('Models/Weights.h5', overwrite=True)
    except:
        print "Error in saving model."
    print "Training complete...\n"
def train_model(weight=None, batch_size=32, epochs=10):

    cg = caption_generator.CaptionGenerator()
    model = cg.create_model()

    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = 'weights-improvement-epoch-{epoch:02d}-val_acc-{val_acc:.5f}-val_loss-{val_loss:.5f}.hdf5'
    checkpoint = ModelCheckpoint(file_name,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max')
    callbacks_list = [checkpoint]
    model.fit_generator(cg.data_generator(
        path='Flickr8k_text/flickr_8k_train_dataset.txt',
        batch_size=batch_size),
                        steps_per_epoch=cg.total_samples / batch_size,
                        epochs=epochs,
                        verbose=2,
                        callbacks=callbacks_list,
                        validation_steps=cg.total_samples_dev / batch_size,
                        validation_data=cg.data_generator(
                            path='Flickr8k_text/flickr_8k_dev_dataset.txt',
                            batch_size=batch_size))
    try:
        model.save('Models/WholeModel.h5', overwrite=True)
        model.save_weights('Models/Weights.h5', overwrite=True)
    except:
        print "Error in saving model."
    print "Training complete...\n"
def train_model(weight=None, batch_size=00, epochs=25):

    # Total samples : 64146

    cg = caption_generator.CaptionGenerator()
    model = cg.create_model()

    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = 'weights-improvement-{epoch:02d}.hdf5'
    checkpoint = ModelCheckpoint(file_name,
                                 monitor='loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    callbacks_list = [checkpoint]

    model.fit_generator(cg.data_generator(batch_size=batch_size),
                        steps_per_epoch=300,
                        epochs=epochs,
                        verbose=2,
                        callbacks=callbacks_list)
    try:
        model.save(
            'C:/Users/pc/Desktop/automatic_image_caption_generation/Models/WholeModel.h5',
            overwrite=True)
        model.save_weights(
            'C:/Users/pc/Desktop/automatic_image_caption_generation/Models/Weights.h5',
            overwrite=True)
    except:
        print "Error in saving model."
    print "Training complete...\n"
    def _assertExpectedCaptions(self,
                                expected_captions,
                                beam_size=3,
                                max_caption_length=20,
                                length_normalization_factor=0):
        """Tests that beam search generates the expected captions.

    Args:
      expected_captions: A sequence of pairs (sentence, probability), where
        sentence is a list of integer ids and probability is a float in [0, 1].
      beam_size: Parameter passed to beam_search().
      max_caption_length: Parameter passed to beam_search().
      length_normalization_factor: Parameter passed to beam_search().
    """
        expected_sentences = [c[0] for c in expected_captions]
        expected_probabilities = [c[1] for c in expected_captions]

        # Generate captions.
        generator = caption_generator.CaptionGenerator(
            model=FakeModel(),
            vocab=FakeVocab(),
            beam_size=beam_size,
            max_caption_length=max_caption_length,
            length_normalization_factor=length_normalization_factor)
        actual_captions = generator.beam_search(sess=None, encoded_image=None)

        actual_sentences = [c.sentence for c in actual_captions]
        actual_probabilities = [math.exp(c.logprob) for c in actual_captions]

        self.assertEqual(expected_sentences, actual_sentences)
        self.assertAllClose(expected_probabilities, actual_probabilities)
def train_model(weight=None, batch_size=32, epochs=10):
    cg = caption_generator.CaptionGenerator()
    model = cg.create_model()

    if weight is not None:
        model.load_weights(weight)

    file_name = 'weights-improvement-{epoch:02d}.hdf5'
    checkpoint = ModelCheckpoint(file_name,
                                 monitor='loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    callbacks_list = [checkpoint]
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06)

    parallel_model = multi_gpu_model(model, gpus=2)
    parallel_model.compile(loss='categorical_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy'])
    parallel_model.fit_generator(cg.data_generator(batch_size=batch_size),
                                 steps_per_epoch=cg.total_samples / batch_size,
                                 epochs=epochs,
                                 verbose=2,
                                 callbacks=callbacks_list)
    try:
        parallel_model.save(os.path.join(CUR_DIR, '../Models/WholeModel.h5'),
                            overwrite=True)
        parallel_model.save_weights(os.path.join(CUR_DIR,
                                                 '../Models/Weights.h5'),
                                    overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")
Exemplo n.º 6
0
def train_model(weight = None, batch_size=32, epochs = 10):

    cg = caption_generator.CaptionGenerator()
    model = cg.create_model()

    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = DATA_PATH + 'weights-checkpoint.hdf5'

    #define callbacks
    checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min')
    tensor_board = TensorBoard(log_dir='./logs', write_graph=False, write_images=False)
    reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=8, verbose=1, min_lr=1e-6)
    early_stopping = EarlyStopping(monitor='loss', min_delta=0.01, patience=16, verbose=1)

    callbacks_list = [checkpoint, tensor_board, reduce_lr, early_stopping]
    hist = model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=epochs, verbose=2, callbacks=callbacks_list)

    try:
        model.save(DATA_PATH + 'final_model.h5', overwrite=True)
        model.save_weights(DATA_PATH + 'final_weights.h5',overwrite=True)
    except:
        print "Error in saving model."

    print "Training complete...\n"

    return hist
Exemplo n.º 7
0
def train_model(weight=None, batch_size=256, epochs=10):

    cg = caption_generator.CaptionGenerator()
    model = cg.create_model()

    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = DATA_PATH + 'weights-checkpoint.h5'

    #define callbacks
    checkpoint = ModelCheckpoint(file_name,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
    tensor_board = TensorBoard(log_dir='./logs', write_graph=True)
    hist_lr = LR_hist()
    reduce_lr = LearningRateScheduler(step_decay)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.1,
                                   patience=16,
                                   verbose=1)
    callbacks_list = [
        checkpoint, tensor_board, hist_lr, reduce_lr, early_stopping
    ]

    hist = model.fit_generator(
        cg.data_generator_train(batch_size=batch_size),
        steps_per_epoch=cg.total_samples / batch_size,
        epochs=epochs,
        verbose=2,
        callbacks=callbacks_list,
        validation_data=cg.data_generator_val(batch_size=batch_size),
        validation_steps=cg.total_samples / (batch_size * 13.0))

    model.save(DATA_PATH + 'final_model.h5', overwrite=True)
    model.save_weights(DATA_PATH + 'final_weights.h5', overwrite=True)

    hist_file = DATA_PATH + '/hist_model.dat'
    with open(hist_file, 'w') as f:
        pickle.dump(hist.history, f)

    print "training complete...\n"

    return model, hist, hist_lr
Exemplo n.º 8
0
def main(_):
    model = show_and_tell_model.ShowAndTellModel( FLAGS.model_path )
    vocabi = vocab.Vocabulary(FLAGS.vocab_file)
    filenames = _load_filenames()

    generator = caption_generator.CaptionGenerator (model, vocabi)

    for filename in filenames:
        with tf.gfile.GFile(filename, "rb") as f:
            image = f.read()
        captions = generator.beam_search(image)
        print("Captions for image %s:" % os.path.basename(filename))
        for i, caption in enumerate(captions):
            # Ignore begin and end tokens <S> and </S>.
            sentence = [ vocab.Vocabulary.id_to_token(vocabi,w) for w in caption.sentence[1:-1]]
            sentence = " ".join(sentence)
            print("  %d) %s (p=%f)" % (i, sentence, math.exp(caption.logprob)))
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))

    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        images = []
        for filename in filenames:
            with tf.gfile.GFile(filename, "rb") as f:
                image = f.read()
                images.append(image)
        captions = generator.beam_search(sess, images, vocab)
        for i, image in enumerate(images):
            print("Captions for image %s:" % os.path.basename(filenames[i]))
            for j, caption in enumerate(captions[i]):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = " ".join(sentence)
                print("  %d) %s (p=%f)" %
                      (j, sentence, math.exp(caption.logprob)))
Exemplo n.º 10
0
def img_captions(file_inputs):
  # Build the inference graph.
  g = tf.Graph()
  with g.as_default():
    model = inference_wrapper.InferenceWrapper()
    restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                               file_inputs[0])
  g.finalize()

  # Create the vocabulary.
  vocab = vocabulary.Vocabulary(file_inputs[1])

  filenames = []
  for file_pattern in file_inputs[2].split(","):
    filenames.extend(tf.gfile.Glob(file_pattern))
  tf.logging.info("Running caption generation on %d files matching %s",
                  len(filenames), file_inputs[2])

  with tf.Session(graph=g) as sess:
    # Load the model from checkpoint.
    restore_fn(sess)

    # Prepare the caption generator. Here we are implicitly using the default
    # beam search parameters. See caption_generator.py for a description of the
    # available beam search parameters.
    generator = caption_generator.CaptionGenerator(model, vocab)

    caption_list = list()
    prob_list = list()
    for filename in filenames:
      with tf.gfile.GFile(filename, "rb") as f:
        image = f.read()
      captions, probs = generator.beam_search(sess, image)
      prob_list.append('['+", ".join(map(str, probs))+']')

      loc_cap_list = list()
      for i, caption in enumerate(captions):
        # Ignore begin and end words.
        sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence).split('<S>')[0]
        loc_cap_list.append([sentence, math.exp(caption.logprob)])
      caption_list.append(loc_cap_list)
  return prob_list, caption_list
Exemplo n.º 11
0
def start_testing_images(model_path):
    #Load model
    detection_graph = load_frozen_model(model_path)
    vocab = vocabulary.Vocabulary()
    generator = caption_generator.CaptionGenerator(None, vocab)

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            #print ([n.name for n in tf.get_default_graph().as_graph_def().node])
            #image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
            #input_feed = tf.placeholder(dtype=tf.int64,
            #    shape=[None],  # batch_size
            #    name="input_feed")
            #for op in detection_graph.get_operations():
            #    print(op.name)
            for image_path in TEST_IMAGE_PATHS:
                start = timeit.default_timer()
                image = tf.gfile.GFile(image_path, 'rb').read()
                img = Image.open(image_path)
                npimg = load_image_into_numpy_array(img)
                print(type(image))
                stop = timeit.default_timer()
                print("Time to encode image: ", stop - start)

                # Actual detection
                start = timeit.default_timer()
                captions = generator.beam_search(sess, image, img)
                stop = timeit.default_timer()
                print("Time to Generate captions: ", stop - start)

                #Caption priting...
                start = timeit.default_timer()
                for i, caption in enumerate(captions):
                    sentence = [
                        vocab.id_to_word(w) for w in caption.sentence[1:-1]
                    ]
                    sentence = " ".join(sentence)
                    print("  %d) %s (p=%f)" %
                          (i, sentence, math.exp(caption.logprob)))
                stop = timeit.default_timer()
                print("Time for Caption -> Sentence", stop - start)
Exemplo n.º 12
0
def train_model(weight = None, batch_size=256, epochs = 10):

    cg = caption_generator.CaptionGenerator()
    model = cg.create_model()
    model.summary()
    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = 'weights-improvement-{epoch:02d}-{count:02d}.hdf5'
    checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint]
    
    model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list)
    #model.fit_generator(cg.data_generator(batch_size=batch_size), epochs=1, verbose=1, callbacks=callbacks_list)
    counter = 1

    model.optimizer.lr = 0.008
    model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list)
    counter = 2
    model.optimizer.lr = 0.006
    model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list)
    
    counter = 4
    model.optimizer.lr = 0.004
    model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list)

    counter = 5
    model.optimizer.lr = 0.002
    model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=5, verbose=1, callbacks=callbacks_list)

    counter = 6
    model.optimizer.lr = 0.001
    model.fit_generator(cg.data_generator(batch_size=batch_size), steps_per_epoch=cg.total_samples/batch_size, epochs=45, verbose=1, callbacks=callbacks_list)

    try:
        model.save('/home/manish.singhal/Image-Captioning-master/caption_generator/Models/WholeModel.h5', overwrite=True)
        model.save_weights('/home/manish.singhal/Image-Captioning-master/caption_generator/Models/Weights.h5',overwrite=True)
    except:
        print ("Error in saving model.")
    print ("Training complete...\n")
Exemplo n.º 13
0
encoded_images = file['valid_set']
valid_list_file = "data/valid_list.txt"
train_step = conf.train_step
checkpoint_steps = conf.original_train_steps + (train_step -
                                                1) * conf.interval_train_steps

check_point_path = "train_log/{}.ckpt".format(checkpoint_steps)

model = inference_wrapper.InferenceWrapper()
restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                           check_point_path)

sess = tf.InteractiveSession()
restore_fn(sess)

generator = caption_generator.CaptionGenerator(
    model, vocab, beam_size=1, use_ngram=conf.use_ngram_gen_result)

valid_list_file = open(valid_list_file, 'r')
valid_image_list = []
for line in valid_list_file.readlines():
    valid_image_list.append(line.strip().split()[0])
# output three optional sentences for each image, ranking by probability in decreasing order
# with open('/home/chengcheng/dataset/image_caption/inference/3/valid_caption_{}.txt'.format(checkpoint_steps), 'w') as f:

result_list = []

for index in range(1000):
    captions = generator.beam_search(sess, encoded_images[index])
    # if encoded_images[index] != valid_image_list[index]:
    #    print(encoded_images[index], valid_image_list[index])
    if index % 100 == 0:
Exemplo n.º 14
0
import cPickle as pickle
import caption_generator
import numpy as np
from keras.preprocessing import sequence
import nltk

cg = caption_generator.CaptionGenerator()


def process_caption(caption):
    caption_split = caption.split()
    processed_caption = caption_split[1:]
    try:
        end_index = processed_caption.index('<end>')
        processed_caption = processed_caption[:end_index]
    except:
        pass
    return " ".join([word for word in processed_caption])


def get_best_caption(captions):
    captions.sort(key=lambda l: l[1])
    best_caption = captions[-1][0]
    return " ".join([cg.index_word[index] for index in best_caption])


def get_all_captions(captions):
    final_captions = []
    captions.sort(key=lambda l: l[1])
    for caption in captions:
        text_caption = " ".join([cg.index_word[index] for index in caption[0]])
Exemplo n.º 15
0
import math

vocab = vocabulary.Vocabulary("data/dic.txt")
file = h5py.File("/home/chengcheng/dataset/image_caption/feat.hdf5", 'r')
encoded_images = file['valid_set']
valid_list_file = "/home/chengcheng/ImageCaption/data/valid_list.txt"
check_point_steps = 800000

model = inference_wrapper.InferenceWrapper()
restore_fn = model.build_graph_from_config(
    configuration.ModelConfig(), "train_log/{}.ckpt".format(check_point_steps))

sess = tf.InteractiveSession()
restore_fn(sess)

generator = caption_generator.CaptionGenerator(model, vocab)

valid_list_file = open(valid_list_file, 'r')
valid_image_list = []
for line in valid_list_file.readlines():
    valid_image_list.append(line.strip().split()[0])
# output three optional sentences for each image, ranking by probability in decreasing order
with open('infer_result/6000_0_valid_caption_{}.txt'.format(check_point_steps),
          'w') as f:
    for index in range(1000):
        captions = generator.beam_search(sess, encoded_images[index])
        f.write(valid_image_list[index])
        # print("Captions for image {}".format(valid_image_list[index]))
        for i, caption in enumerate(captions):
            # Ignore begin and end words.
            sentence = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
Exemplo n.º 16
0
def main(_):
    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)

        ##################
        results = []
        results0 = []
        results1 = []
        results2 = []
        count = 1
        for filename in filenames:
            with tf.gfile.GFile(filename, "r") as f:
                image = f.read()
            captions = generator.beam_search(sess, image)
            image_name_full = os.path.basename(filename)
            print("Captions for %d/30000 image %s:" % (count, image_name_full))
            count = count + 1
            b = re.compile(r'.jpg')
            image_name = b.sub('', image_name_full)

            for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = "".join(sentence)
                if i == 0:
                    results0.append({
                        "image_id": image_name,
                        "caption": sentence,
                    })

                if i == 1:
                    results1.append({
                        "image_id": image_name,
                        "caption": sentence,
                    })
                if i == 2:
                    results2.append({
                        "image_id": image_name,
                        "caption": sentence,
                    })
                results.append({
                    "image_id": image_name,
                    "caption": sentence,
                })
                print("  %d) %s (p=%f)" %
                      (i, sentence, math.exp(caption.logprob)))

        print("the length of results is:", len(results))
        print("the length of results is:", len(results0))
        print("the length of results is:", len(results1))
        print("the length of results is:", len(results2))

        #
        outfile = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results.json"
        outfile0 = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results0.json"
        outfile1 = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results1.json"
        outfile2 = "/media/han/6f586f18-792a-40fd-ada6-59702fb5dabc/wen/im2txt-aic/eval/data/val_results2.json"

        with io.open(outfile, 'w', encoding='utf-8') as fd:
            fd.write(
                unicode(
                    json.dumps(results,
                               ensure_ascii=False,
                               sort_keys=True,
                               indent=2,
                               separators=(',', ': '))))
        with io.open(outfile0, 'w', encoding='utf-8') as fd0:
            fd0.write(
                unicode(
                    json.dumps(results0,
                               ensure_ascii=False,
                               sort_keys=True,
                               indent=2,
                               separators=(',', ': '))))
        with io.open(outfile1, 'w', encoding='utf-8') as fd1:
            fd1.write(
                unicode(
                    json.dumps(results1,
                               ensure_ascii=False,
                               sort_keys=True,
                               indent=2,
                               separators=(',', ': '))))
        with io.open(outfile2, 'w', encoding='utf-8') as fd2:
            fd2.write(
                unicode(
                    json.dumps(results2,
                               ensure_ascii=False,
                               sort_keys=True,
                               indent=2,
                               separators=(',', ': '))))
Exemplo n.º 17
0
def caption_image():
    """API to caption images"""
    image_format = "not jpeg"

    st = current_time()
    # get beam_size
    beam_size = int(request.args.get("beam_size", "3"))
    # get max_caption_length
    max_caption_length = int(request.args.get("max_caption_length", "20"))
    # get image_data
    if request.method == 'POST':
        image_data = request.get_data()
    else:
        url = request.args.get("url")
        c_type, image_data = get_remote_file(url)
        if not image_data:
            return Response(status=400, response=jsonify(error="Could not HTTP GET %s" % url))
        if 'image/jpeg' in c_type:
            image_format = "jpeg"

    # use c_type to find whether image_format is jpeg or not
    # if jpeg, don't convert
    if image_format == "jpeg":
        jpg_image = image_data
    # if not jpeg
    else:
        # open the image from raw bytes
        image = Image.open(BytesIO(image_data))
        # convert the image to RGB format, otherwise will give errors when converting to jpeg, if the image isn't RGB
        rgb_image = image.convert("RGB")
        # convert the RGB image to jpeg
        image_bytes = BytesIO()
        rgb_image.save(image_bytes, format="jpeg", quality=95)
        jpg_image = image_bytes.getvalue()
        image_bytes.close()

    read_time = current_time() - st
    # restart counter
    st = current_time()

    generator = caption_generator.CaptionGenerator(app.model,
                                                   app.vocab,
                                                   beam_size=beam_size,
                                                   max_caption_length=max_caption_length)
    captions = generator.beam_search(app.sess, jpg_image)

    captioning_time = current_time() - st
    app.logger.info("Captioning time : %d" % captioning_time)

    array_captions = []
    for caption in captions:
        sentence = [app.vocab.id_to_word(w) for w in caption.sentence[1:-1]]
        sentence = " ".join(sentence)
        array_captions.append({
            'sentence': sentence,
            'confidence': math.exp(caption.logprob)
        })

    response = {
        'beam_size': beam_size,
        'max_caption_length': max_caption_length,
        'captions': array_captions,
        'time': {
            'read': read_time,
            'captioning': captioning_time,
            'units': 'ms'
        }
    }
    return Response(response=json.dumps(response), status=200, mimetype="application/json")
Exemplo n.º 18
0
def main(_):
    #convert jpg image(s) into iamge representations using alexnet:
    filenames = [
        os.path.join(image_dir, f) for f in [
            'overly-attached-girlfriend.jpg',
            'high-expectations-asian-father.jpg', 'foul-bachelor-frog.jpg',
            'stoner-stanley.jpg', 'y-u-no.jpg', 'willy-wonka.jpg',
            'futurama-fry.jpg', 'success-kid.jpg', 'one-does-not-simply.jpg',
            'bad-luck-brian.jpg', 'first-world-problems.jpg',
            'philosoraptor.jpg', 'what-if-i-told-you.jpg', 'TutorPP.jpg'
        ]
    ]
    print(filenames)
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)
    #mean of imagenet dataset in BGR
    imagenet_mean = np.array([104., 117., 124.], dtype=np.float32)

    #placeholder for input and dropout rate
    x_Alex = tf.placeholder(tf.float32, [1, 227, 227, 3])
    keep_prob_Alex = tf.placeholder(tf.float32)

    #create model with default config ( == no skip_layer and 1000 units in the last layer)
    modelAlex = AlexNet(x_Alex, keep_prob_Alex, 1000, [], ['fc7', 'fc8'],
                        512)  #maybe need to put fc8 in skip_layers

    #define activation of last layer as score
    score = modelAlex.fc6

    meme_embeddings = []
    with tf.Session() as sess:

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        # Load the pretrained weights into the model
        modelAlex.load_initial_weights(sess)

        for i, meme in enumerate(filenames):
            img = Image.open(meme)
            try:
                img.thumbnail((227, 227), Image.ANTIALIAS)
                #img = img.resize((227,227))
                #use img.thumbnail for square images, img.resize for non square
                assert np.shape(img) == (227, 227, 3)
            except AssertionError:
                img = img.resize((227, 227))
                print('sizing error')

            # Subtract the ImageNet mean
            img = img - imagenet_mean  #should probably change this

            # Reshape as needed to feed into model
            img = img.reshape((1, 227, 227, 3))

            meme_vector = sess.run(score,
                                   feed_dict={
                                       x_Alex: img,
                                       keep_prob_Alex: 1
                                   })  #[1,4096]
            meme_vector = np.reshape(meme_vector, [4096])
            assert np.shape(meme_vector) == (4096, )

            #now have np embeddings to feed for inference
            meme_embeddings.append(meme_vector)

    with open('Captions.txt', 'r') as f:
        data_captions = f.readlines()
    data_captions = [s.lower() for s in data_captions]

    # Build the inference graph.
    g = tf.Graph()
    with g.as_default():
        model = inference_wrapper.InferenceWrapper()
        restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                                   FLAGS.checkpoint_path)
    g.finalize()

    # Create the vocabulary.
    vocab = vocabulary.Vocabulary(FLAGS.vocab_file)

    #filenames = []
    #for file_pattern in FLAGS.input_files.split(","):
    #filenames.extend(tf.gfile.Glob(file_pattern))
    #tf.logging.info("Running caption generation on %d files matching %s",
    #len(filenames), FLAGS.input_files)
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        restore_fn(sess)

        # Prepare the caption generator. Here we are implicitly using the default
        # beam search parameters. See caption_generator.py for a description of the
        # available beam search parameters.
        generator = caption_generator.CaptionGenerator(model, vocab)
        num_in_data_total = 0
        num_captions = 0
        for i, meme in enumerate(meme_embeddings):
            #with tf.gfile.GFile(filename, "rb") as f:
            #image = f.read()
            captions = generator.beam_search(sess, meme)
            print("Captions for image %s:" % os.path.basename(filenames[i]))
            num_in_data = 0
            for i, caption in enumerate(captions):
                # Ignore begin and end words.
                sentence = [
                    vocab.id_to_word(w) for w in caption.sentence[1:-1]
                ]
                sentence = " ".join(sentence)
                in_data = 0
                if b_any(sentence in capt for capt in data_captions):
                    in_data = 1
                    num_in_data += 1
                    num_in_data_total += 1
                    num_captions += 1
                else:
                    num_captions += 1
                print("  %d) %s (p=%f) [in data = %d]" %
                      (i, sentence, math.exp(caption.logprob), in_data))
            print("number of captions in data = %d" % (num_in_data))
        print("(total number of captions in data = %d) percent in data = %f" %
              (num_in_data_total, (num_in_data_total / num_captions)))
Exemplo n.º 19
0
train_vector_file = "data/train_vector.txt"

train_step = conf.train_step
checkpoint_steps = conf.original_train_steps + (train_step -
                                                1) * conf.interval_train_steps

checkpoint_path = "train_log/{}.ckpt".format(checkpoint_steps)

model = inference_wrapper.InferenceWrapper()
restore_fn = model.build_graph_from_config(configuration.ModelConfig(),
                                           checkpoint_path)

sess = tf.InteractiveSession()
restore_fn(sess)

generator = caption_generator.CaptionGenerator(
    model, vocab, beam_size=conf.beam_size, use_ngram=conf.use_ngram_gen_label)

train_list_file = open(train_list_file, 'r')
train_image_list = []
for line in train_list_file.readlines():
    train_image_list.append(line.strip().split()[0])
# output three optional sentences for each image, ranking by probability in decreasing order
# with open('/home/chengcheng/dataset/image_caption/inference/3/train_caption_{}.txt'.format(check_point_steps), 'w') as f:

caption_vector_path = "train_log/{}_infer_train_vector.txt".format(
    checkpoint_steps)

label_file = open(caption_vector_path, 'w')

index = -1
for line in open(train_vector_file):