Пример #1
0
    def evaluate_decision_maker(self):

        info = {}

        acc, scores = self.evaluate_with_questions()

        for i in range(len(acc)):
            info['mk-supp/val accuracy Q{}'.format(i)] = acc[i]
            info['mk-supp/CIDEr Q{}'.format(i)] = scores[i]['CIDEr'] * 100.0
            info['mk-supp/Bleu_4 Q{}'.format(i)] = scores[i]['Bleu_4'] * 100.0
            info['mk-supp/Bleu_3 Q{}'.format(i)] = scores[i]['Bleu_3'] * 100.0
            info['mk-supp/Bleu_2 Q{}'.format(i)] = scores[i]['Bleu_2'] * 100.0
            info['mk-supp/Bleu_1 Q{}'.format(i)] = scores[i]['Bleu_1'] * 100.0
            info['mk-supp/ROUGE_L Q{}'.format(
                i)] = scores[i]['ROUGE_L'] * 100.0
            info['mk-supp/METEOR Q{}'.format(i)] = scores[i]['METEOR'] * 100.0
            info['mk-main/weighted score Q{}'.format(i)] = \
                linear_reward_weighting(scores[i]['Bleu_1'], scores[i]['Bleu_2'], scores[i]['Bleu_3'],
                                        scores[i]['Bleu_4'], scores[i]['ROUGE_L'], scores[i]['METEOR'],
                                        scores[i]['CIDEr']) * 100.0

        model_score = linear_reward_weighting(
            scores[-1]['Bleu_1'], scores[-1]['Bleu_2'], scores[-1]['Bleu_3'],
            scores[-1]['Bleu_4'], scores[-1]['ROUGE_L'], scores[-1]['METEOR'],
            scores[-1]['CIDEr']) * 100.0

        self.std_logger.info(
            "Round {} | Epoch {}: | Weighted score: {}".format(
                self.chunk + 1, self.collection_epoch, model_score))

        util.epoch_logging(self.logger, info, self.collection_epoch)
        return model_score
Пример #2
0
    def loop_chunk(self, repochs):

        if self.opt.cap_eval:
            self.captioner.eval()
        else:
            self.captioner.train()

        if self.opt.quegen_eval:
            self.qgen.eval()
        else:
            self.qgen.train()
        for epoch in range(repochs):
            self.std_logger.info(
                "Training decision maker and collecting captions")

            self.train()

            info, distrs = self.data_collector.get_epoch_stats()
            info = {'mk-main/' + k: v for k, v in info.iteritems()}
            util.epoch_logging(self.logger, info, self.collection_epoch)
            util.distr_logging(self.logger, distrs, self.collection_epoch)
            self.data_collector.reset_epoch_counters()

            model_score = self.evaluate_decision_maker()
            self.save_decision_maker(epoch, model_score)

            self.collection_epoch += 1
Пример #3
0
    def evaluate_captioner(self):

        print("Validating captioner")

        # compute loss, word-for-word accuracy, and coco-caption metrics
        val_loss, val_acc, val_pos_acc, val_pos_loss = self.validate_captioner(
        )
        scores = eval_coco(self.captioner, self.eval_loader, self.opt.run_name,
                           self.result_path, self.opt)
        weighted_score = linear_reward_weighting(
            scores['Bleu_1'], scores['Bleu_2'], scores['Bleu_3'],
            scores['Bleu_4'], scores['ROUGE_L'], scores['METEOR'],
            scores['CIDEr']) * 100.0
        model_score = weighted_score

        info = {
            'val loss': val_loss,
            'val accuracy': val_acc,
            'val pos accuracy': val_pos_acc,
            'val pos loss': val_pos_loss,
            'eval cider': scores['CIDEr'] * 100.0,
            'eval bleu 4': scores['Bleu_4'] * 100.0,
            'eval bleu 3': scores['Bleu_3'] * 100.0,
            'eval bleu 2': scores['Bleu_2'] * 100.0,
            'eval bleu 1': scores['Bleu_1'] * 100.0,
            'eval rouge L': scores['ROUGE_L'] * 100.0,
            'eval meteor': scores['METEOR'] * 100.0,
            'eval weighted score': weighted_score
        }

        self.std_logger.info(str(info))
        util.epoch_logging(self.logger, info,
                           self.chunk * self.opt.cap_epochs + self.cap_epoch)

        return model_score
Пример #4
0
    def loop_chunk(self, repochs):

        for epoch in range(repochs):
            self.train()

            info, distrs = self.data_collector.get_epoch_stats()
            info = {'mk-main/' + k: v for k, v in info.iteritems()}
            util.epoch_logging(self.logger, info, self.collection_epoch)
            util.distr_logging(self.logger, distrs, self.collection_epoch)
            self.data_collector.reset_epoch_counters()

            self.collection_epoch += 1
    def evaluate(self, epoch):

        samples, [val_loss, val_acc] = self.validate()
        # TODO: implement VQA2.0 scoring score = eval_vqa(samples)

        info = {'val loss': val_loss, 'val accuracy': val_acc}

        model_score = val_acc

        self.std_logger.info(str(info))
        util.epoch_logging(self.logger, info, epoch)

        return model_score
    def evaluate(self, epoch):

        val_loss, val_acc, val_greedy_correct = self.validate()
        model_score = val_greedy_correct

        info = {
            'val loss': val_loss,
            'val accuracy': val_acc,
            'val correct answers': val_greedy_correct
        }

        self.std_logger.info(str(info))

        util.epoch_logging(self.logger, info, epoch)
        # util.parameter_logging(self.logger, self.model, epoch)

        return model_score