Exemplo n.º 1
0
    def test_mmf_loss(self):
        get_loss_class_mock = MagicMock(side_effect=build_loss_side_effect())
        registry.get_loss_class = get_loss_class_mock
        # Test if MMFLoss accepts empty parameters
        self.assertRaises(ValueError, losses.MMFLoss)
        self.assertTrue(
            losses.MMFLoss({
                "type": "cross_entropy"
            }).name, "cross_entropy")
        self.assertTrue(losses.MMFLoss("cross_entropy").name, "cross_entropy")
        self.assertRaises(AssertionError, losses.MMFLoss, [])
        # Multi requires dict
        self.assertRaises(AssertionError, losses.MMFLoss, "multi")

        cross_entropy = losses.MMFLoss("cross_entropy")
        cross_entropy_from_dict = losses.MMFLoss({"type": "cross_entropy"})
        sample_list = SampleList()
        sample_list.dataset_type = "val"
        sample_list.dataset_name = "vqa2"

        output = cross_entropy(sample_list, {})
        output_from_dict = cross_entropy_from_dict(sample_list, {})

        self.assertEqual(output, {"val/vqa2/cross_entropy": torch.tensor(1.0)})
        self.assertEqual(output_from_dict, output)

        get_loss_class_mock.side_effect = build_loss_side_effect(1.0)
        output = cross_entropy(sample_list, {})

        self.assertEqual(output, {"val/vqa2/cross_entropy": torch.tensor(1.0)})
        self.assertEqual(output_from_dict, output)

        self.assertTrue(get_loss_class_mock.called)
        self.assertEqual(get_loss_class_mock.call_count, 5)
Exemplo n.º 2
0
    def _load_objects(self, idx):
        image_info = self._get_image_info(idx)
        image_height = image_info["height"]
        image_width = image_info["width"]
        object_map = {}
        objects = []

        for obj in image_info["objects"]:
            obj["synsets"] = self.synset_processor({"tokens":
                                                    obj["synsets"]})["text"]
            obj["names"] = self.name_processor({"tokens":
                                                obj["names"]})["text"]
            obj["height"] = obj["h"] / image_height
            obj.pop("h")
            obj["width"] = obj["w"] / image_width
            obj.pop("w")
            obj["y"] /= image_height
            obj["x"] /= image_width
            obj["attributes"] = self.attribute_processor(
                {"tokens": obj["attributes"]})["text"]
            obj = Sample(obj)
            object_map[obj["object_id"]] = obj
            objects.append(obj)
        objects = SampleList(objects)

        return objects, object_map
    def test_beam_search(self):
        vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES)
        model_config = self.config.model_config.butd
        model = TestDecoderModel(model_config, vocab)
        model.build()
        model.eval()

        expected_tokens = {
            1: [1.0, 23.0, 1.0, 24.0, 29.0, 37.0, 40.0, 17.0, 29.0, 2.0],
            2: [1.0, 0.0, 8.0, 1.0, 28.0, 25.0, 2.0],
            8: [1.0, 34.0, 1.0, 13.0, 1.0, 2.0],
            16: [1.0, 25.0, 18.0, 2.0],
        }

        for batch_size in [1, 2, 8, 16]:
            samples = []
            for _ in range(batch_size):
                sample = Sample()
                sample.dataset_name = "coco"
                sample.dataset_type = "test"
                sample.image_feature_0 = torch.randn(100, 2048)
                sample.answers = torch.zeros((5, 10), dtype=torch.long)
                samples.append(sample)

            sample_list = SampleList(samples)
            tokens = model(sample_list)["captions"]
            self.assertEqual(np.trim_zeros(tokens[0].tolist()),
                             expected_tokens[batch_size])
Exemplo n.º 4
0
    def classify(self, image: ImageType, text: str):
        """Classifies a given image and text in it into Hateful/Non-Hateful.
        Image can be a url or a local path or you can directly pass a PIL.Image.Image
        object. Text needs to be a sentence containing all text in the image.

            >>> from VisualBERT.mmf.models.mmbt import MMBT
            >>> model = MMBT.from_pretrained("mmbt.hateful_memes.images")
            >>> model.classify("some_url", "some_text")
            {"label": 0, "confidence": 0.56}

        Args:
            image (ImageType): Image to be classified
            text (str): Text in the image

        Returns:
            bool: Whether image is hateful (1) or non hateful (0)
        """
        if isinstance(image, str):
            if image.startswith("http"):
                temp_file = tempfile.NamedTemporaryFile()
                download(image,
                         *os.path.split(temp_file.name),
                         disable_tqdm=True)
                image = tv_helpers.default_loader(temp_file.name)
                temp_file.close()
            else:
                image = tv_helpers.default_loader(image)

        text = self.processor_dict["text_processor"]({"text": text})
        image = self.processor_dict["image_processor"](image)

        sample = Sample()
        sample.text = text["text"]
        if "input_ids" in text:
            sample.update(text)

        sample.image = image
        sample_list = SampleList([sample])
        device = next(self.model.parameters()).device
        sample_list = sample_list.to(device)

        output = self.model(sample_list)
        scores = nn.functional.softmax(output["scores"], dim=1)
        confidence, label = torch.max(scores, dim=1)

        return {"label": label.item(), "confidence": confidence.item()}
    def test_finetune_model(self):
        self.finetune_model.eval()
        test_sample = Sample()
        test_sample.input_ids = torch.randint(low=0, high=30255,
                                              size=(128, )).long()
        test_sample.input_mask = torch.ones(128).long()
        test_sample.segment_ids = torch.zeros(128).long()
        test_sample.image = torch.rand((3, 300, 300)).float()
        test_sample_list = SampleList([test_sample.copy()])

        with torch.no_grad():
            model_output = self.finetune_model.model(test_sample_list)

        test_sample_list = SampleList([test_sample])
        script_model = torch.jit.script(self.finetune_model.model)
        with torch.no_grad():
            script_output = script_model(test_sample_list)

        self.assertTrue(
            torch.equal(model_output["scores"], script_output["scores"]))
    def prepare_batch(self, batch):
        """
        Can be possibly overridden in your child class

        Prepare batch for passing to model. Whatever returned from here will
        be directly passed to model's forward function. Currently moves the batch to
        proper device.

        Args:
            batch (SampleList): sample list containing the currently loaded batch

        Returns:
            sample_list (SampleList): Returns a sample representing current
                batch loaded
        """
        # Should be a SampleList
        if not isinstance(batch, SampleList):
            # Try converting to SampleList
            batch = SampleList(batch)
        batch = batch.to(self._device)
        return batch
    def test_forward(self):
        model_config = self.config.model_config.cnn_lstm

        cnn_lstm = CNNLSTM(model_config)
        cnn_lstm.build()
        cnn_lstm.init_losses()

        self.assertTrue(isinstance(cnn_lstm, torch.nn.Module))

        test_sample = Sample()
        test_sample.text = torch.randint(1, 79, (10, ), dtype=torch.long)
        test_sample.image = torch.randn(3, 320, 480)
        test_sample.targets = torch.randn(32)

        test_sample_list = SampleList([test_sample])
        test_sample_list.dataset_type = "train"
        test_sample_list.dataset_name = "clevr"
        output = cnn_lstm(test_sample_list)

        scores = output["scores"]
        loss = output["losses"]["train/clevr/logit_bce"]

        np.testing.assert_almost_equal(loss.item(), 19.2635, decimal=4)
        self.assertEqual(scores.size(), torch.Size((1, 32)))
Exemplo n.º 8
0
    def test_argmax_prediction_processor(self):
        processor = ArgMaxPredictionProcessor(config={})
        batch = SampleList(
            {"id": torch.tensor([1, 2, 3, 4, 5], dtype=torch.long)})
        model_output = {"scores": torch.rand(5, 4)}
        report = Report(batch, model_output)

        predictions = processor(report)

        expected_answers = [1, 1, 2, 1, 3]
        expected = []
        for idx, answer in enumerate(expected_answers):
            expected.append({"id": idx + 1, "answer": answer})

        self.assertEqual(predictions, expected)
def build_random_sample_list():
    from VisualBERT.mmf.common.sample import Sample, SampleList

    first = Sample()
    first.x = random.randint(0, 100)
    first.y = torch.rand((5, 4))
    first.z = Sample()
    first.z.x = random.randint(0, 100)
    first.z.y = torch.rand((6, 4))

    second = Sample()
    second.x = random.randint(0, 100)
    second.y = torch.rand((5, 4))
    second.z = Sample()
    second.z.x = random.randint(0, 100)
    second.z.y = torch.rand((6, 4))

    return SampleList([first, second])
def compare_torchscript_transformer_models(model, vocab_size):
    test_sample = Sample()
    test_sample.input_ids = torch.randint(low=0, high=vocab_size,
                                          size=(128, )).long()
    test_sample.input_mask = torch.ones(128).long()
    test_sample.segment_ids = torch.zeros(128).long()
    test_sample.image_feature_0 = torch.rand((1, 100, 2048)).float()
    test_sample.image = torch.rand((3, 300, 300)).float()
    test_sample_list = SampleList([test_sample])

    with torch.no_grad():
        model_output = model(test_sample_list)

    script_model = torch.jit.script(model)
    with torch.no_grad():
        script_output = script_model(test_sample_list)

    return torch.equal(model_output["scores"], script_output["scores"])
Exemplo n.º 11
0
    def _load_regions(self, idx, object_map, relationship_map):
        if self._return_scene_graph is None:
            return None, None

        image_info = self._get_image_info(idx)
        image_height = image_info["height"]
        image_width = image_info["width"]
        region_map = {}
        regions = []

        for region in image_info["regions"]:
            for synset in region["synsets"]:
                synset["entity_name"] = self.name_processor(
                    {"tokens": [synset["entity_name"]]})["text"]
                synset["synset_name"] = self.synset_processor(
                    {"tokens": [synset["synset_name"]]})["text"]

            region["height"] /= image_height
            region["width"] /= image_width
            region["y"] /= image_height
            region["x"] /= image_width

            relationships = []
            objects = []

            for relationship_idx in region["relationships"]:
                relationships.append(relationship_map[relationship_idx])

            for object_idx in region["objects"]:
                objects.append(object_map[object_idx])

            region["relationships"] = relationships
            region["objects"] = objects
            region["phrase"] = self.text_processor({"text":
                                                    region["phrase"]})["text"]

            region = Sample(region)
            region_map[region["region_id"]] = region
            regions.append(region)

        regions = SampleList(regions)
        return regions, region_map
Exemplo n.º 12
0
    def __call__(self, batch):
        # Create and return sample list with proper name
        # and type set if it is already not a sample list
        # (case of batched iterators)
        sample_list = batch
        if (
            # Check if batch is a list before checking batch[0]
            # or len as sometimes batch is already SampleList
            isinstance(batch, list)
            and len(batch) == 1
            and isinstance(batch[0], SampleList)
        ):
            sample_list = batch[0]
        elif not isinstance(batch, SampleList):
            sample_list = SampleList(batch)

        if sample_list._get_tensor_field() is None:
            sample_list = SampleList(sample_list.to_dict())

        sample_list.dataset_name = self._dataset_name
        sample_list.dataset_type = self._dataset_type
        return sample_list
Exemplo n.º 13
0
    def test_nucleus_sampling(self):
        vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES)

        model_config = self.config.model_config.butd
        model = TestDecoderModel(model_config, vocab)
        model.build()
        model.eval()

        sample = Sample()
        sample.dataset_name = "coco"
        sample.dataset_type = "test"
        sample.image_feature_0 = torch.randn(100, 2048)
        sample.answers = torch.zeros((5, 10), dtype=torch.long)
        sample_list = SampleList([sample])

        tokens = model(sample_list)["captions"]

        # these are expected tokens for sum_threshold = 0.5

        # Because of a bug fix in https://github.com/pytorch/pytorch/pull/47386
        # the torch.Tensor.multinomail will generate different random sequence.
        # TODO: Remove this hack after OSS uses later version of PyTorch.
        if LegacyVersion(torch.__version__) > LegacyVersion("1.7.1"):
            expected_tokens = [1.0, 23.0, 38.0, 30.0, 5.0, 11.0, 2.0]
        else:
            expected_tokens = [
                1.0,
                29.0,
                11.0,
                11.0,
                39.0,
                10.0,
                31.0,
                4.0,
                19.0,
                39.0,
                2.0,
            ]

        self.assertEqual(tokens[0].tolist(), expected_tokens)
Exemplo n.º 14
0
    def __call__(self, item):
        texts = item["text"]
        if not isinstance(texts, list):
            texts = [texts]

        processed = []
        for idx, text in enumerate(texts):
            sample = Sample()
            processed_text = self.tokenizer({"text": text})
            sample.update(processed_text)
            sample.segment_ids.fill_(idx)
            processed.append(sample)
        # Use SampleList to convert list of tensors to stacked tensors
        processed = SampleList(processed)
        if self.fusion_strategy == "concat":
            processed.input_ids = processed.input_ids.view(-1)
            processed.input_mask = processed.input_mask.view(-1)
            processed.segment_ids = processed.segment_ids.view(-1)
            processed.lm_label_ids = processed.lm_label_ids.view(-1)
        return processed.to_dict()
Exemplo n.º 15
0
    def _load_relationships(self, idx, object_map):
        if self._return_relationships is None and self._return_scene_graph is None:
            return None, None

        image_info = self._get_image_info(idx)
        relationship_map = {}
        relationships = []

        for relationship in image_info["relationships"]:
            relationship["synsets"] = self.synset_processor(
                {"tokens": relationship["synsets"]})["text"]
            relationship["predicate"] = self.predicate_processor(
                {"tokens": relationship["predicate"]})["text"]
            relationship["object"] = object_map[relationship["object_id"]]
            relationship["subject"] = object_map[relationship["subject_id"]]

            relationship = Sample(relationship)
            relationship_map[relationship["relationship_id"]] = relationship
            relationships.append(relationship)

        relationships = SampleList(relationships)
        return relationships, relationship_map
Exemplo n.º 16
0
    def test_modal_end_token(self):
        self.finetune_model.eval()

        # Suppose 0 for <cls>, 1 for <pad> 2 for <sep>
        CLS = 0
        PAD = 1
        SEP = 2
        size = 128

        input_ids = torch.randint(low=0, high=30255, size=(size, )).long()
        input_mask = torch.ones(size).long()

        input_ids[0] = CLS
        length = torch.randint(low=2, high=size - 1, size=(1, ))
        input_ids[length] = SEP
        input_ids[length + 1:] = PAD
        input_mask[length + 1:] = 0

        test_sample = Sample()
        test_sample.input_ids = input_ids.clone()
        test_sample.input_mask = input_mask.clone()
        test_sample.segment_ids = torch.zeros(size).long()
        test_sample.image = torch.rand((3, 300, 300)).float()
        test_sample_list = SampleList([test_sample])

        mmbt_base = self.finetune_model.model.bert
        with torch.no_grad():
            actual_modal_end_token = mmbt_base.extract_modal_end_token(
                test_sample_list)

        expected_modal_end_token = torch.zeros([1]).fill_(SEP).long()
        self.assertTrue(
            torch.equal(actual_modal_end_token, expected_modal_end_token))
        self.assertTrue(
            torch.equal(test_sample_list.input_ids[0, :-1], input_ids[1:]))
        self.assertTrue(
            torch.equal(test_sample_list.input_mask[0, :-1], input_mask[1:]))
 def prepare_batch(self, batch, *args, **kwargs):
     batch = SampleList(batch)
     return self.mapping[batch.dataset_type].prepare_batch(batch)
    def test_pretrained_model(self):
        sample_list = SampleList()

        sample_list.add_field(
            "input_ids",
            torch.randint(low=0, high=BERT_VOCAB_SIZE, size=(1, 128)).long(),
        )
        sample_list.add_field("input_mask", torch.ones((1, 128)).long())
        sample_list.add_field("segment_ids", torch.zeros(1, 128).long())
        sample_list.add_field("image_feature_0",
                              torch.rand((1, 100, 2048)).float())
        sample_list.add_field(
            "lm_label_ids",
            torch.zeros((1, 128), dtype=torch.long).fill_(-1))

        self.pretrain_model.eval()

        sample_list.dataset_name = "random"
        sample_list.dataset_type = "test"
        with torch.no_grad():
            model_output = self.pretrain_model(sample_list)

        self.assertTrue("losses" in model_output)
        self.assertTrue("random/test/masked_lm_loss" in model_output["losses"])
        self.assertTrue(
            model_output["losses"]["random/test/masked_lm_loss"] == 0)