Пример #1
0
    def test_resize_and_normalize_matches_generalized_rcnn_transform(self):
        loader = TorchImageLoader(resize=True, normalize=True, size_divisibility=32)
        transform = torchvision.models.detection.transform.GeneralizedRCNNTransform(
            loader.min_size, loader.max_size, loader.pixel_mean, loader.pixel_std
        )

        loaded_image, _ = loader([self.image_fixture_path])

        raw_image, _ = TorchImageLoader(resize=False, normalize=False)(self.image_fixture_path)
        transformed_raw_image, _ = transform([raw_image])

        assert loaded_image.shape == transformed_raw_image.tensors.shape
Пример #2
0
 def test_resize_and_normalize(self, device):
     loader = TorchImageLoader(resize=True, normalize=True, device=device)
     torch_device = torch.device(device)
     image, size = loader(self.image_fixture_path)
     assert image.device == torch_device
     assert size.device == torch_device
     assert image.shape[1] == 800
Пример #3
0
    def test_forward_runs(self):
        loader = TorchImageLoader(device="cuda:0")
        backbone = ResnetBackbone().to("cuda:0")

        image_pixels, image_size = loader([
            self.FIXTURES_ROOT / "data" / "images" /
            "COCO_train2014_000000458752.jpg"
        ])
        result = backbone(image_pixels, image_size)
        assert tuple(result.keys()) == backbone.get_feature_names()
    def test_forward_runs(self):
        loader = TorchImageLoader(resize=True, normalize=True, device="cuda:0")
        backbone = ResnetBackbone().to(device="cuda:0")
        backbone.eval()
        detector = FasterRcnnRegionDetector().to(device="cuda:0")
        detector.eval()

        image_path = self.FIXTURES_ROOT / "data" / "images" / "COCO_train2014_000000458752.jpg"

        images, sizes = loader([image_path, image_path])
        image_features = backbone(images, sizes)
        del backbone
        detections = detector(images, sizes, image_features)
        del detector

        assert len(detections.features) == 2
        assert len(detections.boxes) == 2
        assert len(detections.class_probs) == 2
        assert len(detections.class_labels) == 2

        assert detections.features[0].shape[0] >= 1
        assert detections.features[0].shape[1] == 1024
        assert (detections.features[0].shape[0] == detections.boxes[0].shape[0]
                == detections.class_probs[0].shape[0] ==
                detections.class_labels[0].shape[0])

        # Okay, cool, so far so good. Now let's make sure the output we got
        # actually matches exactly what we would get using the full pipeline
        # directly from torchvision.
        raw_loader = TorchImageLoader(resize=False,
                                      normalize=False,
                                      device="cuda:0")
        image, _ = raw_loader(image_path)
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            pretrained=True).to("cuda:0")
        model.eval()
        result = model([image, image])
        # We can't compare the boxes directly because the boxes here are post-processed
        # back to reference the original un-resized image. But we can compare
        # the labels and scores. They should match exactly.
        assert (result[0]["labels"] == detections.class_labels[0]).all()
        assert (result[0]["scores"] == detections.class_probs[0]).all()
Пример #5
0
    def test_train_read(self):
        self.reader = Flickr30kReader(
            image_dir=FIXTURES_ROOT / "vision" / "images" / "flickr30k",
            image_loader=TorchImageLoader(),
            image_featurizer=Lazy(NullGridEmbedder),
            data_dir=FIXTURES_ROOT / "vision" / "flickr30k" / "sentences",
            region_detector=Lazy(RandomRegionDetector),
            tokenizer=WhitespaceTokenizer(),
            token_indexers={"tokens": SingleIdTokenIndexer()},
            featurize_captions=False,
            num_potential_hard_negatives=4,
        )

        instances = list(self.reader.read("test_fixtures/vision/flickr30k/test.txt"))
        assert len(instances) == 25

        instance = instances[5]
        assert len(instance.fields) == 5
        assert len(instance["caption"]) == 4
        assert len(instance["caption"][0]) == 12  # 16
        assert instance["caption"][0] != instance["caption"][1]
        assert instance["caption"][0] == instance["caption"][2]
        assert instance["caption"][0] == instance["caption"][3]
        question_tokens = [t.text for t in instance["caption"][0]]
        assert question_tokens == [
            "girl",
            "with",
            "brown",
            "hair",
            "sits",
            "on",
            "edge",
            "of",
            "concrete",
            "area",
            "overlooking",
            "water",
        ]

        batch = Batch(instances)
        batch.index_instances(Vocabulary())
        tensors = batch.as_tensor_dict()

        # (batch size, num images (3 hard negatives + gold image), num boxes (fake), num features (fake))
        assert tensors["box_features"].size() == (25, 4, 2, 10)

        # (batch size, num images (3 hard negatives + gold image), num boxes (fake), 4 coords)
        assert tensors["box_coordinates"].size() == (25, 4, 2, 4)

        # (batch size, num images (3 hard negatives + gold image), num boxes (fake),)
        assert tensors["box_mask"].size() == (25, 4, 2)

        # (batch size)
        assert tensors["label"].size() == (25,)
Пример #6
0
    def setup_method(self):
        from allennlp_models.vision.dataset_readers.gqa import GQAReader

        super().setup_method()
        self.reader = GQAReader(
            image_dir=FIXTURES_ROOT / "vision" / "images" / "gqa",
            image_loader=TorchImageLoader(),
            image_featurizer=Lazy(NullGridEmbedder),
            region_detector=Lazy(RandomRegionDetector),
            tokenizer=WhitespaceTokenizer(),
            token_indexers={"tokens": SingleIdTokenIndexer()},
        )
    def test_read(self):
        from allennlp_models.vision.dataset_readers.vqav2 import VQAv2Reader

        reader = VQAv2Reader(
            image_dir=FIXTURES_ROOT / "vision" / "images" / "vqav2",
            image_loader=TorchImageLoader(),
            image_featurizer=Lazy(NullGridEmbedder),
            region_detector=Lazy(RandomRegionDetector),
            tokenizer=WhitespaceTokenizer(),
            token_indexers={"tokens": SingleIdTokenIndexer()},
        )
        instances = list(reader.read("unittest"))
        assert len(instances) == 3

        instance = instances[0]
        assert len(instance.fields) == 6
        assert len(instance["question"]) == 7
        question_tokens = [t.text for t in instance["question"]]
        assert question_tokens == [
            "What", "is", "this", "photo", "taken", "looking", "through?"
        ]
        assert len(instance["labels"]) == 5
        labels = [field.label for field in instance["labels"].field_list]
        assert labels == ["net", "netting", "mesh", "pitcher", "orange"]
        assert torch.allclose(
            instance["label_weights"].tensor,
            torch.tensor([1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3, 1.0 / 3]),
        )

        batch = Batch(instances)
        batch.index_instances(Vocabulary())
        tensors = batch.as_tensor_dict()

        # (batch size, num boxes (fake), num features (fake))
        assert tensors["box_features"].size() == (3, 2, 10)

        # (batch size, num boxes (fake), 4 coords)
        assert tensors["box_coordinates"].size() == (3, 2, 4)

        # (batch size, num boxes (fake),)
        assert tensors["box_mask"].size() == (3, 2)

        # Nothing should be masked out since the number of fake boxes is the same
        # for each item in the batch.
        assert tensors["box_mask"].all()
Пример #8
0
    def test_read(self):
        from allennlp_models.vision.dataset_readers.vgqa import VGQAReader

        reader = VGQAReader(
            image_dir=FIXTURES_ROOT / "vision" / "images" / "vgqa",
            image_loader=TorchImageLoader(),
            image_featurizer=Lazy(NullGridEmbedder),
            region_detector=Lazy(RandomRegionDetector),
            tokenizer=WhitespaceTokenizer(),
            token_indexers={"tokens": SingleIdTokenIndexer()},
        )
        instances = list(
            reader.read("test_fixtures/vision/vgqa/question_answers.json"))
        assert len(instances) == 8

        instance = instances[0]
        assert len(instance.fields) == 6
        assert len(instance["question"]) == 5
        question_tokens = [t.text for t in instance["question"]]
        assert question_tokens == ["What", "is", "on", "the", "curtains?"]
        assert len(instance["labels"]) == 1
        labels = [field.label for field in instance["labels"].field_list]
        assert labels == ["sailboats"]

        batch = Batch(instances)
        batch.index_instances(Vocabulary())
        tensors = batch.as_tensor_dict()

        # (batch size, num boxes (fake), num features (fake))
        assert tensors["box_features"].size() == (8, 2, 10)

        # (batch size, num boxes (fake), 4 coords)
        assert tensors["box_coordinates"].size() == (8, 2, 4)

        # (batch size, num boxes (fake))
        assert tensors["box_mask"].size() == (8, 2)

        # Nothing should be masked out since the number of fake boxes is the same
        # for each item in the batch.
        assert tensors["box_mask"].all()
Пример #9
0
    def test_read(self):
        from allennlp_models.vision.dataset_readers.nlvr2 import Nlvr2Reader

        reader = Nlvr2Reader(
            image_dir=FIXTURES_ROOT / "vision" / "images" / "nlvr2",
            image_loader=TorchImageLoader(),
            image_featurizer=Lazy(NullGridEmbedder),
            region_detector=Lazy(RandomRegionDetector),
            tokenizer=WhitespaceTokenizer(),
            token_indexers={"tokens": SingleIdTokenIndexer()},
        )
        instances = list(
            reader.read("test_fixtures/vision/nlvr2/tiny-dev.json"))
        assert len(instances) == 8

        instance = instances[0]
        assert len(instance.fields) == 6
        assert instance["hypothesis"][0] == instance["hypothesis"][1]
        assert len(instance["hypothesis"][0]) == 18
        hypothesis_tokens = [t.text for t in instance["hypothesis"][0]]
        assert hypothesis_tokens[:6] == [
            "The", "right", "image", "shows", "a", "curving"
        ]
        assert instance["label"].label == 0
        assert instances[1]["label"].label == 1
        assert instance["identifier"].metadata == "dev-850-0-0"

        batch = Batch(instances)
        batch.index_instances(Vocabulary())
        tensors = batch.as_tensor_dict()

        # (batch size, 2 images per instance, num boxes (fake), num features (fake))
        assert tensors["box_features"].size() == (8, 2, 2, 10)

        # (batch size, 2 images per instance, num boxes (fake), 4 coords)
        assert tensors["box_coordinates"].size() == (8, 2, 2, 4)

        # (batch size, 2 images per instance, num boxes (fake))
        assert tensors["box_mask"].size() == (8, 2, 2)
Пример #10
0
    def test_read(self):
        from allennlp_models.vision.dataset_readers.visual_entailment import VisualEntailmentReader

        reader = VisualEntailmentReader(
            image_dir=FIXTURES_ROOT / "vision" / "images" /
            "visual_entailment",
            image_loader=TorchImageLoader(),
            image_featurizer=Lazy(NullGridEmbedder),
            region_detector=Lazy(RandomRegionDetector),
            tokenizer=WhitespaceTokenizer(),
            token_indexers={"tokens": SingleIdTokenIndexer()},
        )
        instances = list(
            reader.read(
                "test_fixtures/vision/visual_entailment/sample_pairs.jsonl"))
        assert len(instances) == 16

        instance = instances[0]
        assert len(instance.fields) == 5
        assert len(instance["hypothesis"]) == 4
        sentence_tokens = [t.text for t in instance["hypothesis"]]
        assert sentence_tokens == ["A", "toddler", "sleeps", "outside."]
        assert instance["labels"].label == "contradiction"

        batch = Batch(instances)
        vocab = Vocabulary()
        vocab.add_tokens_to_namespace(
            ["entailment", "contradiction", "neutral"], "labels")
        batch.index_instances(vocab)
        tensors = batch.as_tensor_dict()

        # (batch size, num boxes (fake), num features (fake))
        assert tensors["box_features"].size() == (16, 2, 10)

        # (batch size, num boxes (fake), 4 coords)
        assert tensors["box_coordinates"].size() == (16, 2, 4)

        # (batch_size, num boxes (fake),)
        assert tensors["box_mask"].size() == (16, 2)
Пример #11
0
    def test_basic_load(self, device, loader_params):
        loader = TorchImageLoader(resize=False, normalize=False, device=device, **loader_params)
        torch_device = torch.device(device)
        images, sizes = loader([self.TEST_DIR / "image1.jpg", self.TEST_DIR / "image2.jpg"])
        assert images.device == torch_device
        assert sizes.device == torch_device
        assert images.shape[0] == 2
        assert images.shape[1] == 3
        assert sizes.shape == (2, 2)
        assert list(sizes[0]) == [7, 15]
        assert list(sizes[1]) == [9, 12]
        if loader.size_divisibility <= 1:
            assert images.shape[2] == 9
            assert images.shape[3] == 15
        else:
            assert images.shape[2] >= 9
            assert images.shape[3] >= 15
            assert (images.shape[2] / loader.size_divisibility) % 1 == 0

        image, size = loader(self.TEST_DIR / "image1.jpg")
        assert image.device == torch_device
        assert size.device == torch_device
        assert len(image.shape) == 3
        assert list(size) == [7, 15]