def evaluate_best_model(model_type, test_img_ids, test_img_id_2_feature_vector, vocabulary, train_captions): # initialize the model: if model_type == "GRU": config = GRU_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = GRU_Model(config, dummy_embeddings, mode="demo") elif model_type == "LSTM": config = LSTM_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = LSTM_Model(config, dummy_embeddings, mode="demo") elif model_type == "LSTM_attention": config = LSTM_attention_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = LSTM_attention_Model(config, dummy_embeddings, mode="demo") elif model_type == "GRU_attention": config = GRU_attention_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = GRU_attention_Model(config, dummy_embeddings, mode="demo") # create the saver: saver = tf.train.Saver() with tf.Session() as sess: # restore the best model: if model_type == "GRU": saver.restore(sess, "models/GRUs/best_model/model") elif model_type == "LSTM": saver.restore(sess, "models/LSTMs/best_model/model") elif model_type == "LSTM_attention": saver.restore(sess, "models/LSTMs_attention/best_model/model") elif model_type == "GRU_attention": saver.restore(sess, "models/GRUs_attention/best_model/model") captions = [] no_of_new_captions = 0 no_of_old_captions = 0 unique_words = [] for img_id in test_img_ids: #if step % 100 == 0: # print("generating captions on test: %d" % step) # generate a caption for the img: if model_type in ["LSTM", "GRU"]: img_id = int(img_id) img_features = test_img_id_2_feature_vector[img_id] img_caption = model.generate_img_caption(sess, img_features, vocabulary) elif model_type in ["LSTM_attention", "GRU_attention"]: # get the img features from disk: img_features = cPickle.load( open("coco/data/img_features_attention/%d" % img_id, "rb")) # generate a caption: img_caption, attention_maps = model.generate_img_caption(sess, img_features, vocabulary) # save the generated caption together with the img id in the format # expected by the COCO evaluation script: caption_obj = {} caption_obj["image_id"] = img_id caption_obj["caption"] = img_caption captions.append(caption_obj) # check if the generated caption is new or is in train: if img_caption in train_captions: no_of_old_captions += 1 else: no_of_new_captions += 1 # check if there are any words in the caption that he model hasn't # generated before: for word in img_caption.split(" "): if word not in unique_words: unique_words.append(word) # save the captions as a json file (will be used by the eval script): captions_file = "coco/data/test_captions.json" with open(captions_file, "w") as file: json.dump(captions, file, sort_keys=True, indent=4) # evaluate the generated captions: results_dict = evaluate_captions(captions_file) # compute the ratio of new captions: new_captions_ratio = float(no_of_new_captions)/float(no_of_new_captions + no_of_old_captions) # get the number of unique words that the model generated: vocab_size = len(unique_words) print("vocab size is ") print(vocab_size) results_dict["new_captions_ratio"] = new_captions_ratio results_dict["vocab_size"] = vocab_size return results_dict
def main(): # create a config object: config = LSTM_Config() # get the pretrained embeddings matrix: GloVe_embeddings = cPickle.load(open("coco/data/embeddings_matrix")) GloVe_embeddings = GloVe_embeddings.astype(np.float32) # create an LSTM model object: model = LSTM_Model(config, GloVe_embeddings) # initialize the list that will contain the loss for each epoch: loss_per_epoch = [] # initialize the list that will contain all evaluation metrics (BLEU, CIDEr, # METEOR and ROUGE_L) for each epoch: eval_metrics_per_epoch = [] # create a saver for saving all model variables/parameters: saver = tf.train.Saver(max_to_keep=model.config.max_no_of_epochs) with tf.Session() as sess: # initialize all variables/parameters: init = tf.global_variables_initializer() sess.run(init) #saver.restore(sess, "models/LSTMs/model_keep=0.50_batch=256_hidden_dim=200_embed_dim=300_layers=3/weights/model-50") for epoch in range(config.max_no_of_epochs): print "###########################" print "######## NEW EPOCH ########" print "###########################" print "epoch: %d/%d" % (epoch, config.max_no_of_epochs - 1) log("###########################") log("######## NEW EPOCH ########") log("###########################") log("epoch: %d/%d" % (epoch, config.max_no_of_epochs - 1)) # run an epoch and get all batch losses: batch_losses = model.run_epoch(sess) # compute the epoch loss: epoch_loss = np.mean(batch_losses) # save the epoch loss: loss_per_epoch.append(epoch_loss) # save the epoch losses to disk: cPickle.dump(loss_per_epoch, open("%s/losses/loss_per_epoch"\ % model.config.model_dir, "w")) # generate captions on a (subset) of val: captions_file = model.generate_captions_on_val(sess, epoch, model.vocabulary, val_set_size=1000) # evaluate the generated captions (compute eval metrics): eval_result_dict = evaluate_captions(captions_file) # save the epoch evaluation metrics: eval_metrics_per_epoch.append(eval_result_dict) # save the evaluation metrics for all epochs to disk: cPickle.dump(eval_metrics_per_epoch, open("%s/eval_results/metrics_per_epoch"\ % model.config.model_dir, "w")) if eval_result_dict["CIDEr"] > 0.92: # save the model weights to disk: saver.save(sess, "%s/weights/model" % model.config.model_dir, global_step=epoch) print "epoch loss: %f | BLEU4: %f | CIDEr: %f" % ( epoch_loss, eval_result_dict["Bleu_4"], eval_result_dict["CIDEr"]) log("epoch loss: %f | BLEU4: %f | CIDEr: %f" % (epoch_loss, eval_result_dict["Bleu_4"], eval_result_dict["CIDEr"])) # plot the loss and the different evaluation metrics vs epoch: plot_performance(config.model_dir)
else: no_of_new_captions += 1 # check if there are any words in the caption that he model hasn't # generated before: for word in img_caption.split(" "): if word not in unique_words: unique_words.append(word) # save the captions as a json file (will be used by the eval script): captions_file = "coco/data/test_captions.json" with open(captions_file, "w") as file: json.dump(captions, file, sort_keys=True, indent=4) # evaluate the generated captions: results_dict = evaluate_captions(captions_file) # compute the ratio of new captions: new_captions_ratio = float(no_of_new_captions)/float(no_of_new_captions + no_of_old_captions) # get the number of unique words that the model generated: vocab_size = len(unique_words) results_dict["new_captions_ratio"] = new_captions_ratio results_dict["vocab_size"] = vocab_size return results_dict def main(): # load the vocabulary: