Пример #1
0
def main(results):
    # restore config and model
    dir_output = results

    config_data = Config(dir_output + "data.json")
    config_vocab = Config(dir_output + "vocab.json")
    config_model = Config(dir_output + "model.json")

    vocab = Vocab(config_vocab)
    model = Img2SeqModel(config_model, dir_output, vocab)
    model.build_pred()
    model.restore_session(dir_output + "model.weights/")

    # load dataset
    test_set = DataGenerator(
        index_file=config_data.index_test,
        path_formulas=config_data.path_formulas_test,
        dir_images=config_data.dir_images_test,
        max_iter=config_data.max_iter,
        path_matching=config_data.path_matching_test,
        form_prepro=vocab.form_prepro)

    # use model to write predictions in files
    config_eval = Config({"dir_answers": dir_output + "formulas_test/",
                          "batch_size": 20})
    files, perplexity = model.write_prediction(config_eval, test_set)
    formula_ref, formula_hyp = files[0], files[1]

    # score the ref and prediction files
    scores = score_files(formula_ref, formula_hyp)
    scores["perplexity"] = perplexity
    msg = " - ".join(["{} {:04.2f}".format(k, v) for k, v in scores.items()])
    model.logger.info("- Test Txt: {}".format(msg))
Пример #2
0
    def _run_evaluate(self, config, test_set):
        """Performs an epoch of evaluation

        Args:
            test_set: Dataset instance
            config: (Config) with batch_size and dir_answers

        Returns:
            scores: (dict) scores["acc"] = 0.85 for instance

        """
        files, perp = self.write_prediction(config, test_set)
        scores = score_files(files[0], files[1])
        scores["perplexity"] = perp

        return scores
Пример #3
0
    def _run_evaluate(self, config, test_set):
        """Performs an epoch of evaluation

        Args:
            test_set: Dataset instance
            params: (dict) with extra params in it
                - "dir_name": (string)

        Returns:
            scores: (dict) scores["acc"] = 0.85 for instance

        """
        files, perp = self.write_prediction(config, test_set)
        scores = score_files(files[0], files[1])
        scores["perplexity"] = perp

        return scores
Пример #4
0
    def _run_evaluate_epoch(self, config, test_set):
        """Performs an epoch of evaluation
        Args:
            test_set: Dataset instance
            params: (dict) with extra params in it
                - "dir_name": (string)
        Returns:
            scores: (dict) scores["acc"] = 0.85 for instance
        """
        self.model.eval()
        self.encoder.eval()
        self.decoder.eval()
        # initialize containers of references and predictions
        if self._config.decoding == "greedy":
            refs, hyps = [], [[]]
        elif self._config.decoding == "beam_search":
            refs, hyps = [], [[] for i in range(self._config.beam_size)]
        references = list()  # references (true captions) for calculating BLEU-4 score
        hypotheses = list()  # hypotheses (predictions)
        with torch.no_grad():
            nbatches = len(test_set)
            prog = Progbar(nbatches)
            test_loader = torch.utils.data.DataLoader(ImgFormulaDataset(test_set),
                                                      batch_size=nbatches,
                                                      shuffle=True, num_workers=3, pin_memory=True)

            for i, (img, formula) in enumerate(minibatches(test_set, nbatches)):
                # print(type(img), len(img), img[0].shape)
                # print(type(formula), formula)
                # Move to GPU, if available
                img = pad_batch_images_2(img)
                img = torch.FloatTensor(img)  # (N, W, H, C)
                formula, formula_length = pad_batch_formulas(formula, self._vocab.id_pad, self._vocab.id_end)
                img = img.permute(0, 3, 1, 2)  # (N, C, W, H)
                formula = torch.LongTensor(formula)  # (N,)
                img = img.to(self.device)
                formula = formula.to(self.device)

                # Forward prop.
                imgs = self.encoder(img)
                scores, caps_sorted, decode_lengths, alphas, sort_ind = self.decoder(imgs, formula, torch.LongTensor([[len(i)] for i in formula]))

                # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
                targets = caps_sorted[:, 1:]

                # Remove timesteps that we didn't decode at, or are pads
                # pack_padded_sequence is an easy trick to do this
                scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True)
                targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True)

                # Calculate loss
                loss = self.criterion(scores, targets)

                print(scores.shape, targets.shape)
                print(loss)

                alpha_c = 1.
                # Add doubly stochastic attention regularization
                loss += alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean()

                loss_eval = loss.item()

                prog.update(i + 1, [("loss", loss_eval), ("perplexity", np.exp(loss_eval))])

                # Store references (true captions), and hypothesis (prediction) for each image
                # If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
                # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]
                # print("---------------------------------------------------------------formula and prediction :")
                for form, preds in zip(formula, scores):
                    refs.append(form)
                    # print(form, "    ----------    ", preds[0])
                    for i, pred in enumerate(preds):
                        hyps[i].append(pred)

            files = write_answers(refs, hyps, self._vocab.id_to_tok, config.dir_answers, self._vocab.id_end)
            scores = score_files(files[0], files[1])
            # perp = - np.exp(ce_words / float(n_words))
            # scores["perplexity"] = perp

        self.logger.info("- Evaluating: {}".format(prog.info))

        return {
            "perplexity": loss.item()
        }