def evaluate_decision_maker(self): info = {} acc, scores = self.evaluate_with_questions() for i in range(len(acc)): info['mk-supp/val accuracy Q{}'.format(i)] = acc[i] info['mk-supp/CIDEr Q{}'.format(i)] = scores[i]['CIDEr'] * 100.0 info['mk-supp/Bleu_4 Q{}'.format(i)] = scores[i]['Bleu_4'] * 100.0 info['mk-supp/Bleu_3 Q{}'.format(i)] = scores[i]['Bleu_3'] * 100.0 info['mk-supp/Bleu_2 Q{}'.format(i)] = scores[i]['Bleu_2'] * 100.0 info['mk-supp/Bleu_1 Q{}'.format(i)] = scores[i]['Bleu_1'] * 100.0 info['mk-supp/ROUGE_L Q{}'.format( i)] = scores[i]['ROUGE_L'] * 100.0 info['mk-supp/METEOR Q{}'.format(i)] = scores[i]['METEOR'] * 100.0 info['mk-main/weighted score Q{}'.format(i)] = \ linear_reward_weighting(scores[i]['Bleu_1'], scores[i]['Bleu_2'], scores[i]['Bleu_3'], scores[i]['Bleu_4'], scores[i]['ROUGE_L'], scores[i]['METEOR'], scores[i]['CIDEr']) * 100.0 model_score = linear_reward_weighting( scores[-1]['Bleu_1'], scores[-1]['Bleu_2'], scores[-1]['Bleu_3'], scores[-1]['Bleu_4'], scores[-1]['ROUGE_L'], scores[-1]['METEOR'], scores[-1]['CIDEr']) * 100.0 self.std_logger.info( "Round {} | Epoch {}: | Weighted score: {}".format( self.chunk + 1, self.collection_epoch, model_score)) util.epoch_logging(self.logger, info, self.collection_epoch) return model_score
def loop_chunk(self, repochs): if self.opt.cap_eval: self.captioner.eval() else: self.captioner.train() if self.opt.quegen_eval: self.qgen.eval() else: self.qgen.train() for epoch in range(repochs): self.std_logger.info( "Training decision maker and collecting captions") self.train() info, distrs = self.data_collector.get_epoch_stats() info = {'mk-main/' + k: v for k, v in info.iteritems()} util.epoch_logging(self.logger, info, self.collection_epoch) util.distr_logging(self.logger, distrs, self.collection_epoch) self.data_collector.reset_epoch_counters() model_score = self.evaluate_decision_maker() self.save_decision_maker(epoch, model_score) self.collection_epoch += 1
def evaluate_captioner(self): print("Validating captioner") # compute loss, word-for-word accuracy, and coco-caption metrics val_loss, val_acc, val_pos_acc, val_pos_loss = self.validate_captioner( ) scores = eval_coco(self.captioner, self.eval_loader, self.opt.run_name, self.result_path, self.opt) weighted_score = linear_reward_weighting( scores['Bleu_1'], scores['Bleu_2'], scores['Bleu_3'], scores['Bleu_4'], scores['ROUGE_L'], scores['METEOR'], scores['CIDEr']) * 100.0 model_score = weighted_score info = { 'val loss': val_loss, 'val accuracy': val_acc, 'val pos accuracy': val_pos_acc, 'val pos loss': val_pos_loss, 'eval cider': scores['CIDEr'] * 100.0, 'eval bleu 4': scores['Bleu_4'] * 100.0, 'eval bleu 3': scores['Bleu_3'] * 100.0, 'eval bleu 2': scores['Bleu_2'] * 100.0, 'eval bleu 1': scores['Bleu_1'] * 100.0, 'eval rouge L': scores['ROUGE_L'] * 100.0, 'eval meteor': scores['METEOR'] * 100.0, 'eval weighted score': weighted_score } self.std_logger.info(str(info)) util.epoch_logging(self.logger, info, self.chunk * self.opt.cap_epochs + self.cap_epoch) return model_score
def loop_chunk(self, repochs): for epoch in range(repochs): self.train() info, distrs = self.data_collector.get_epoch_stats() info = {'mk-main/' + k: v for k, v in info.iteritems()} util.epoch_logging(self.logger, info, self.collection_epoch) util.distr_logging(self.logger, distrs, self.collection_epoch) self.data_collector.reset_epoch_counters() self.collection_epoch += 1
def evaluate(self, epoch): samples, [val_loss, val_acc] = self.validate() # TODO: implement VQA2.0 scoring score = eval_vqa(samples) info = {'val loss': val_loss, 'val accuracy': val_acc} model_score = val_acc self.std_logger.info(str(info)) util.epoch_logging(self.logger, info, epoch) return model_score
def evaluate(self, epoch): val_loss, val_acc, val_greedy_correct = self.validate() model_score = val_greedy_correct info = { 'val loss': val_loss, 'val accuracy': val_acc, 'val correct answers': val_greedy_correct } self.std_logger.info(str(info)) util.epoch_logging(self.logger, info, epoch) # util.parameter_logging(self.logger, self.model, epoch) return model_score