예제 #1
0
def evaluate_best_model(model_type, test_img_ids, test_img_id_2_feature_vector, vocabulary, train_captions):
    # initialize the model:
    if model_type == "GRU":
        config = GRU_Config()
        dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim),
                    dtype=np.float32)
        model = GRU_Model(config, dummy_embeddings, mode="demo")
    elif model_type == "LSTM":
        config = LSTM_Config()
        dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim),
                    dtype=np.float32)
        model = LSTM_Model(config, dummy_embeddings, mode="demo")
    elif model_type == "LSTM_attention":
        config = LSTM_attention_Config()
        dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim),
                    dtype=np.float32)
        model = LSTM_attention_Model(config, dummy_embeddings, mode="demo")
    elif model_type == "GRU_attention":
        config = GRU_attention_Config()
        dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim),
                    dtype=np.float32)
        model = GRU_attention_Model(config, dummy_embeddings, mode="demo")
    
    # create the saver:
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # restore the best model:
        if model_type == "GRU":
            saver.restore(sess, "models/GRUs/best_model/model")
        elif model_type == "LSTM":
            saver.restore(sess, "models/LSTMs/best_model/model")
        elif model_type == "LSTM_attention":
            saver.restore(sess, "models/LSTMs_attention/best_model/model")
        elif model_type == "GRU_attention":
            saver.restore(sess, "models/GRUs_attention/best_model/model")

        captions = []
        no_of_new_captions = 0
        no_of_old_captions = 0
        unique_words = []
        for img_id in test_img_ids:
            #if step % 100 == 0:
            #    print("generating captions on test: %d" % step)

            # generate a caption for the img:
            if model_type in ["LSTM", "GRU"]:
                img_id = int(img_id)
                img_features = test_img_id_2_feature_vector[img_id]
                img_caption = model.generate_img_caption(sess, img_features, vocabulary)
            elif model_type in ["LSTM_attention", "GRU_attention"]:
                # get the img features from disk:
                img_features = cPickle.load(
                            open("coco/data/img_features_attention/%d" % img_id, "rb"))
                # generate a caption:
                img_caption, attention_maps = model.generate_img_caption(sess,
                            img_features, vocabulary)

            # save the generated caption together with the img id in the format
            # expected by the COCO evaluation script:
            caption_obj = {}
            caption_obj["image_id"] = img_id
            caption_obj["caption"] = img_caption
            captions.append(caption_obj)

            # check if the generated caption is new or is in train:
            if img_caption in train_captions:
                no_of_old_captions += 1
            else:
                no_of_new_captions += 1

            # check if there are any words in the caption that he model hasn't
            # generated before:
            for word in img_caption.split(" "):
                if word not in unique_words:
                    unique_words.append(word)

    # save the captions as a json file (will be used by the eval script):
    captions_file = "coco/data/test_captions.json"
    with open(captions_file, "w") as file:
        json.dump(captions, file, sort_keys=True, indent=4)

    # evaluate the generated captions:
    results_dict = evaluate_captions(captions_file)

    # compute the ratio of new captions:
    new_captions_ratio = float(no_of_new_captions)/float(no_of_new_captions +
                no_of_old_captions)

    # get the number of unique words that the model generated:
    vocab_size = len(unique_words)
    print("vocab size is ")
    print(vocab_size)

    results_dict["new_captions_ratio"] = new_captions_ratio
    results_dict["vocab_size"] = vocab_size

    return results_dict
예제 #2
0
def main():
    # create a config object:
    config = LSTM_Config()
    # get the pretrained embeddings matrix:
    GloVe_embeddings = cPickle.load(open("coco/data/embeddings_matrix"))
    GloVe_embeddings = GloVe_embeddings.astype(np.float32)
    # create an LSTM model object:
    model = LSTM_Model(config, GloVe_embeddings)

    # initialize the list that will contain the loss for each epoch:
    loss_per_epoch = []
    # initialize the list that will contain all evaluation metrics (BLEU, CIDEr,
    # METEOR and ROUGE_L) for each epoch:
    eval_metrics_per_epoch = []

    # create a saver for saving all model variables/parameters:
    saver = tf.train.Saver(max_to_keep=model.config.max_no_of_epochs)

    with tf.Session() as sess:
        # initialize all variables/parameters:
        init = tf.global_variables_initializer()
        sess.run(init)

        #saver.restore(sess, "models/LSTMs/model_keep=0.50_batch=256_hidden_dim=200_embed_dim=300_layers=3/weights/model-50")

        for epoch in range(config.max_no_of_epochs):
            print "###########################"
            print "######## NEW EPOCH ########"
            print "###########################"
            print "epoch: %d/%d" % (epoch, config.max_no_of_epochs - 1)
            log("###########################")
            log("######## NEW EPOCH ########")
            log("###########################")
            log("epoch: %d/%d" % (epoch, config.max_no_of_epochs - 1))

            # run an epoch and get all batch losses:
            batch_losses = model.run_epoch(sess)

            # compute the epoch loss:
            epoch_loss = np.mean(batch_losses)
            # save the epoch loss:
            loss_per_epoch.append(epoch_loss)
            # save the epoch losses to disk:
            cPickle.dump(loss_per_epoch, open("%s/losses/loss_per_epoch"\
                        % model.config.model_dir, "w"))

            # generate captions on a (subset) of val:
            captions_file = model.generate_captions_on_val(sess,
                                                           epoch,
                                                           model.vocabulary,
                                                           val_set_size=1000)
            # evaluate the generated captions (compute eval metrics):
            eval_result_dict = evaluate_captions(captions_file)
            # save the epoch evaluation metrics:
            eval_metrics_per_epoch.append(eval_result_dict)
            # save the evaluation metrics for all epochs to disk:
            cPickle.dump(eval_metrics_per_epoch, open("%s/eval_results/metrics_per_epoch"\
                        % model.config.model_dir, "w"))

            if eval_result_dict["CIDEr"] > 0.92:
                # save the model weights to disk:
                saver.save(sess,
                           "%s/weights/model" % model.config.model_dir,
                           global_step=epoch)

            print "epoch loss: %f | BLEU4: %f  |  CIDEr: %f" % (
                epoch_loss, eval_result_dict["Bleu_4"],
                eval_result_dict["CIDEr"])
            log("epoch loss: %f | BLEU4: %f  |  CIDEr: %f" %
                (epoch_loss, eval_result_dict["Bleu_4"],
                 eval_result_dict["CIDEr"]))

    # plot the loss and the different evaluation metrics vs epoch:
    plot_performance(config.model_dir)
예제 #3
0
            else:
                no_of_new_captions += 1

            # check if there are any words in the caption that he model hasn't
            # generated before:
            for word in img_caption.split(" "):
                if word not in unique_words:
                    unique_words.append(word)

    # save the captions as a json file (will be used by the eval script):
    captions_file = "coco/data/test_captions.json"
    with open(captions_file, "w") as file:
        json.dump(captions, file, sort_keys=True, indent=4)

    # evaluate the generated captions:
    results_dict = evaluate_captions(captions_file)

    # compute the ratio of new captions:
    new_captions_ratio = float(no_of_new_captions)/float(no_of_new_captions +
                no_of_old_captions)

    # get the number of unique words that the model generated:
    vocab_size = len(unique_words)

    results_dict["new_captions_ratio"] = new_captions_ratio
    results_dict["vocab_size"] = vocab_size

    return results_dict

def main():
    # load the vocabulary: