def test_evaluate_2_gt_boxes_incorrect_confidence(self):
        """
        Test case where 2 gt, 1 boxes were predicted with one matches, and has incorrect confidence score
        :return:
        """
        # Arrange

        sut = MAPEvaluator()

        target = [{
            "image_id":
            torch.tensor(1),
            "boxes":
            torch.tensor([[1, 2, 3, 4], [11, 12, 13, 14]]).float(),
            "labels":
            torch.tensor([1, 1]),
            "iscrowd":
            torch.tensor([0, 0])
        }]

        # [1, 2, 3, 4]
        predicted = [{
            "image_id": torch.tensor(1),
            "boxes": torch.tensor([[1, 2, 3, 4]]).float(),
            "labels": torch.tensor([1]),
            "scores": torch.tensor([0.8])
        }]
        expected_map = .5

        # Act
        actual = sut(target, predicted)

        # Assert
        self.assertEqual(round(expected_map, 2), round(actual, 2))
    def test_evaluate_single_full_match(self):
        """
        Test simple case
        :return:
        """
        # Arrange

        sut = MAPEvaluator()

        target = [{
            "image_id": torch.tensor(1),
            "boxes": torch.tensor([[1, 2, 3, 4]]).float(),
            "labels": torch.tensor([1]),
            "iscrowd": torch.tensor([0])
        }]

        predicted = [{
            "image_id": torch.tensor(1),
            "boxes": torch.tensor([[1, 2, 3, 4]]).float(),
            "labels": torch.tensor([1]),
            "scores": torch.tensor([1.0])
        }]
        expected_map_score = 1.0

        # Act
        actual = sut(target, predicted)

        # Assert
        self.assertEqual(expected_map_score, round(actual, 2))
    def get(self, train_dataset):
        accumulation_steps = int(
            self._get_value(self.additional_args, "accumulation_steps", "1"))
        self.logger.info(
            "Using accumulation steps {}".format(accumulation_steps))
        evaluator = MAPEvaluator(
            max_detections_per_image=train_dataset.max_detections_per_image)
        trainer = Train(patience_epochs=self.patience_epochs,
                        early_stopping=self.early_stopping,
                        epochs=self.epochs,
                        evaluator=evaluator,
                        accumulation_steps=accumulation_steps)

        # Model
        self.logger.info("Using model {}".format(self.model_factory_name))
        model_factory = ModelFactoryServiceLocator().get_factory(
            self.model_factory_name)
        model = model_factory.get_model(num_classes=train_dataset.num_classes)

        # If checkpoint file is available, load from checkpoint
        if self.checkpoint_dir is not None:
            model_files = list(
                glob.glob("{}/*.pth".format(self.checkpoint_dir)))
            if len(model_files) > 0:
                model_file = model_files[0]
                self.logger.info(
                    "Loading checkpoint {} , found {} checkpoint files".format(
                        model_file, len(model_files)))
                model = model_factory.load_model(
                    model_file, num_classes=train_dataset.num_classes)

        # TODO: Enable multi gpu, nn.dataparallel doesnt really work...
        if torch.cuda.device_count() > 1:
            self.logger.info(
                "Using nn.DataParallel../ multigpu.. Currently not working..")
            model = nn.DataParallel(model)
            # Increase batch size so that is equivalent to the batch
            self.batch_size = self.batch_size * torch.cuda.device_count()
        self.logger.info("Using model {}".format(type(model)))

        # Define optimiser
        learning_rate = float(
            self._get_value(self.additional_args, "learning_rate", ".0001"))
        self.logger.info("Using learning_rate {}".format(learning_rate))

        # weight_decay = float(self._get_value(self.additional_args, "weight_decay", "5e-5"))
        # momentum = float(self._get_value(self.additional_args, "momentum", ".9"))
        # optimiser = SGD(lr=learning_rate, params=model.parameters(), momentum=momentum, weight_decay=weight_decay)
        optimiser = Adam(lr=learning_rate, params=model.parameters())
        self.logger.info("Using optimiser {}".format(type(optimiser)))

        # Kick off training pipeline
        train_pipeline = TrainPipeline(batch_size=self.batch_size,
                                       optimiser=optimiser,
                                       trainer=trainer,
                                       num_workers=self.num_workers,
                                       model=model)

        return train_pipeline
    def test_evaluate_2_images(self):
        """
        Test case where there are 2 images as input
        :return:
        """
        # Arrange

        sut = MAPEvaluator()

        target = [{
            "image_id": torch.tensor(1),
            "boxes": torch.tensor([[1, 2, 3, 4]]).float(),
            "labels": torch.tensor([1]),
            "area": torch.tensor([1.0]),
            "iscrowd": torch.tensor([0])
        }, {
            "image_id": torch.tensor(2),
            "boxes": torch.tensor([[1, 2, 3, 4]]).float(),
            "labels": torch.tensor([1]),
            "area": torch.tensor([1.0]),
            "iscrowd": torch.tensor([0])
        }]

        predicted = [{
            "image_id":
            torch.tensor(1),
            "boxes":
            torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]]).float(),
            "labels":
            torch.tensor([1, 1]),
            "scores":
            torch.tensor([1.0, 1.0])
        }, {
            "image_id": torch.tensor(2),
            "boxes": torch.tensor([[7, 8, 9, 10]]).float(),
            "labels": torch.tensor([1]),
            "scores": torch.tensor([1.0])
        }]
        expected_map = .5

        # Act
        actual = sut(target, predicted)

        # Assert
        self.assertEqual(expected_map, round(actual, 2))
    def test_evaluate_2_p_boxes_correct_confidence(self):
        """
        Test case where 2 boxes were predicted and only one matches, and has correct confidence score
        :return:
        """
        # Arrange

        sut = MAPEvaluator()

        target = [{
            "image_id": torch.tensor(1),
            "boxes": torch.tensor([[1, 2, 3, 4]]).float(),
            "labels": torch.tensor([1]),
            "area": torch.tensor([1.0]),
            "iscrowd": torch.tensor([0])
        }]

        # [1, 2, 3, 4]
        predicted = [{
            "image_id":
            torch.tensor(1),
            "boxes":
            torch.tensor([
                [5, 6, 7, 8],
                [1, 2, 3, 4],
            ]).float(),
            "labels":
            torch.tensor([1, 1]),
            "scores":
            torch.tensor([0.5, 0.7])
        }]
        expected_map = 1.0

        # Act
        actual = sut(target, predicted)

        # Assert
        self.assertEqual(round(expected_map, 2), round(actual, 2))
示例#6
0
def evaluateDataset(model, sess, tokenizer, vectorizer, ds, previous_losses=None):

    embedding_vectorizer = EmbeddingLookup(
        "/home/martin/data/qatarliving/embeddings/qatarliving_qc_size100_win10_mincnt5_rpl_skip1_phrFalse_2016_02_23.word2vec.bin")
    seq2seq_embedding_vectorizer = Seq2SeqEmbeddingLookup(sess)
    additional_vectorizers = [
        {'vectorizer': vectorizer,
         'label': 'tfidf-cosine'
         },
        {
            'vectorizer': embedding_vectorizer,
            'label': 'embeddings'
        }
    ]
    from evaluators.bleu_single_evaluator import BLEUSingleEvaluator
    evaluators = [BLEUEvaluator(),
                  MAPEvaluator(),
                  PersisterEvaluator(
                      os.path.join(FLAGS.train_dir, 'responseEvolution-%s-%s' % (ds, model.global_step.eval()))),
                  VocabularyEvaluator(),
                  MAPEvaluatorSummed(),
                  LengthEvaluator(),
                  TTREvaluator(),
                  MegaMAPEvaluator(additional_vectorizers),
                  BLEUSingleEvaluator()
                  ]
    visitDatasetParameterized(sess, model, tokenizer, vectorizer, ds, evaluators)
    MAP = evaluators[1].results()
    bleu_results = evaluators[0].results()
    BLEU = bleu_results['BLEU']
    BLEU_ALL = bleu_results['BLEU_ALL']
    MAP_BLEU = evaluators[8].results()['MAP-BLEU']
    meanAvgBLEU = evaluators[8].results()['meanAvgBLEU']
    ## Persister evaluator saves in a different file, so just call results()
    evaluators[2].results()
    vocab_eval = evaluators[3].results()
    MAP_SUMMED = evaluators[4].results()
    LENGTH = evaluators[5].results()
    TTR = evaluators[6].results()
    MEGA_MAP_SCORES = evaluators[7].results()
    score_path = os.path.join(FLAGS.train_dir, 'scoreEvolution-%s' % ds)
    if not os.path.isfile(score_path):
        with open(score_path, 'w') as out:
            metrics = ["Global step", "Training Perplexity",
                 "MAP",
                 "MAP_SUMMED",
                 "MAP-tfidf",
                 "MAP-tfidf_SUMMED",
                 "MAP-embeddings",
                 "MAP-embeddings_SUMMED",
                 "MAP-bm25",
                 "MAP-bm25_SUMMED",
                 "MAP_AVG",
                 "MAP_BLEU_SUMMED",
                 "MAP-BLEU",
                 "meanAvgBLEU",  "BLEU_POS", "BLEU_ALL", "Vocab size",
                 "Target Vocab Size", "Intersection Vocab size", "LENGTH", "TTR"]
            out.write("\t".join(metrics) + "\n")
    with open(score_path, 'a') as out:
        out.write("\t".join([
            str(model.global_step.eval()),
            str(previous_losses[-1]) if len(previous_losses) > 0 else 'n/a',
            str(MAP),
            str(MAP_SUMMED),
            str(MEGA_MAP_SCORES['tfidf-cosine']),
            str(MEGA_MAP_SCORES['tfidf-cosine_SUMMED']),
            str(MEGA_MAP_SCORES['embeddings']),
            str(MEGA_MAP_SCORES['embeddings_SUMMED']),
            str(MEGA_MAP_SCORES['bm25']),
            str(MEGA_MAP_SCORES['bm25_SUMMED']),
            str(MEGA_MAP_SCORES['MAP_AVG']),
            str(MEGA_MAP_SCORES['bleu_map_SUMMED']),
            str(MAP_BLEU), str(meanAvgBLEU),
            str(BLEU), str(BLEU_ALL),
            str(vocab_eval[0][1]),
            str(vocab_eval[1][1]),
            str(vocab_eval[2][1]),
            str(LENGTH),
            str(TTR)
        ]) + "\n")
    return MAP, BLEU, MEGA_MAP_SCORES