示例#1
0
    def forward(self, images, image_scales, transitions=None):
        feature_list = self.encoder(images, image_scales)
        image_features = feature_list[0]
        assert len(
            feature_list) == 1, 'current model only support batch size 1'

        sample = Sample()
        sample.dataset_name = "coco"
        sample.dataset_type = "test"
        sample.image_feature_0 = image_features
        # it seems answers work as a place holder here
        # hence, it does not matter what it's size is
        sample.answers = torch.zeros((1, 10), dtype=torch.long)
        sample_list = SampleList([sample])
        sample_list = sample_list.to(device)
        # set_trace()
        if transitions is not None:
            sample_list.transitions = transitions

        output = self.decoder(sample_list)
        tokens = output['captions']
        caption = tokens.tolist()[0]
        caption = self.decoder.caption_processor(caption)['caption']

        return caption
示例#2
0
    def predict(self, url, feat_name, get_features=False):
        with torch.no_grad():
            detectron_features = get_detectron_features([url],
                                                        self.detection_model,
                                                        False, feat_name,
                                                        self.cuda_device)
            # returns a single-element list
            detectron_features = detectron_features[0]

            sample = Sample()
            sample.dataset_name = "coco"
            sample.dataset_type = "test"
            sample.image_feature_0 = detectron_features
            sample.answers = torch.zeros((5, 10), dtype=torch.long)

            sample_list = SampleList([sample])
            sample_list = sample_list.to(self.cuda_device)

            tokens = self.caption_model(sample_list)["captions"]

        gc.collect()
        torch.cuda.empty_cache()

        if not get_features:
            return tokens
        else:
            return tokens, detectron_features
示例#3
0
    def prepare_batch(self, batch):
        """
        Can be possibly overriden in your child class

        Prepare batch for passing to model. Whatever returned from here will
        be directly passed to model's forward function

        Parameters
        ----------
        batch: dict
            Dictionary containing information about the next
            sample in batched form

        Returns
        -------
        data: dict
            Contains variables in the following format
            'texts': The main text of the batch which can be a question in
            most of the cases
            'image_features': Image features for the current batch
            'image_dim': Max BBoxes for the images
            'contexts': Contains context relevant to current batch, in VisDial
            this will be the history of the dialog till now

        obs: tensor
            Tensor containing observations for the current batch
        """
        # Should be a SampleList
        if not isinstance(batch, SampleList):
            # Try converting to SampleList
            batch = SampleList(batch)
        batch = batch.to(self._device)
        return batch
示例#4
0
    def predict(self, img_paths, qud):
        """
        We enable batch prediction here
        :return:
        """
        with torch.no_grad():
            detectron_features = self.get_detectron_features(
                img_paths)  # a list of image features
            resnet_features = self.get_resnet_features(
                img_paths)  # [batch_size, 196, 2048]

            sample_list = []
            for i in range(len(detectron_features)):
                sample = Sample()
                processed_text = self.vqa_demo.text_processor({"text": qud})
                sample.text = processed_text["text"]
                sample.text_len = len(processed_text["tokens"])

                sample.image_feature_0 = detectron_features[i]
                sample.image_info_0 = Sample(
                    {"max_features": torch.tensor(100, dtype=torch.long)})
                sample.image_feature_1 = resnet_features[i]
                sample_list.append(sample)

            sample_list = SampleList(sample_list)
            sample_list = sample_list.to("cuda")

            scores = self.vqa_demo.pythia_model(sample_list)["scores"]
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            batch_probs = []
            batch_answers = []

            for i in range(scores.shape[0]):
                top_indices = indices[i]
                top_scores = actual[i]

                probs = []
                answers = []

                for idx, score in enumerate(top_scores):
                    probs.append(score.item())
                    answers.append(
                        self.vqa_demo.answer_processor.idx2word(
                            top_indices[idx].item()))
                batch_probs.append(probs)
                batch_answers.append(answers)

        ## if the memory becomes an issue, we then clear this
        # gc.collect()
        # torch.cuda.empty_cache()

        # list is of batch_size
        # [[ans_1, ans_2], [ans_1, ans2]]
        return batch_probs, batch_answers
示例#5
0
    def __call__(self, batch):
        sample_list = SampleList(batch)
        for key in self._IDENTICAL_VALUE_KEYS:
            sample_list[key + "_"] = sample_list[key]
            sample_list[key] = sample_list[key][0]

        return sample_list
示例#6
0
    def _load_objects(self, idx):
        image_info = self._get_image_info(idx)
        image_height = image_info["height"]
        image_width = image_info["width"]
        object_map = {}
        objects = []

        for obj in image_info["objects"]:
            obj["synsets"] = self.synset_processor({"tokens":
                                                    obj["synsets"]})["text"]
            obj["names"] = self.name_processor({"tokens":
                                                obj["names"]})["text"]
            obj["height"] = obj["h"] / image_height
            obj.pop("h")
            obj["width"] = obj["w"] / image_width
            obj.pop("w")
            obj["y"] /= image_height
            obj["x"] /= image_width
            obj["attributes"] = self.attribute_processor(
                {"tokens": obj["attributes"]})["text"]
            obj = Sample(obj)
            object_map[obj["object_id"]] = obj
            objects.append(obj)
        objects = SampleList(objects)

        return objects, object_map
示例#7
0
    def test_nucleus_sampling(self):
        vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES)

        model_config = self.config.model_attributes.butd
        model = TestDecoderModel(model_config, vocab)
        model.build()
        model.to("cuda")
        model.eval()

        sample = Sample()
        sample.dataset_name = "coco"
        sample.dataset_type = "test"
        sample.image_feature_0 = torch.randn(100, 2048)
        sample.answers = torch.zeros((5, 10), dtype=torch.long)
        sample_list = SampleList([sample])

        tokens = model(sample_list)["captions"]

        # these are expected tokens for sum_threshold = 0.5
        expected_tokens = [
            1.0000e+00, 2.9140e+03, 5.9210e+03, 2.2040e+03, 5.0550e+03,
            9.2240e+03, 4.5120e+03, 1.8200e+02, 3.6490e+03, 6.4090e+03,
            2.0000e+00
        ]

        self.assertEqual(tokens[0].tolist(), expected_tokens)
示例#8
0
    def test_forward(self):
        model_config = self.config.model_attributes.cnn_lstm

        cnn_lstm = CNNLSTM(model_config)
        cnn_lstm.build()
        cnn_lstm.init_losses_and_metrics()

        self.assertTrue(isinstance(cnn_lstm, torch.nn.Module))

        test_sample = Sample()
        test_sample.text = torch.randint(1, 79, (10, ), dtype=torch.long)
        test_sample.image = torch.randn(3, 320, 480)
        test_sample.targets = torch.randn(32)

        test_sample_list = SampleList([test_sample])
        test_sample_list.dataset_type = "train"
        test_sample_list.dataset_name = "clevr"
        output = cnn_lstm(test_sample_list)

        scores = output["scores"]
        loss = output["losses"]["train/clevr/logit_bce"]
        accuracy = output["metrics"]["train/clevr/accuracy"]

        np.testing.assert_almost_equal(loss.item(), 19.2635, decimal=4)
        np.testing.assert_almost_equal(accuracy.item(), 0)
        self.assertEqual(scores.size(), torch.Size((1, 32)))

        expected_scores = [
            -0.7598285675048828, -0.07029829174280167, -0.20382611453533173,
            -0.06990239024162292, 0.7965695858001709, 0.4730074405670166,
            -0.30569902062416077, 0.4244227707386017, 0.6511023044586182,
            0.2480515092611313, -0.5087617635726929, -0.7675772905349731,
            0.4361543357372284, 0.0018743239343166351, 0.6774630546569824,
            0.30618518590927124, -0.398895800113678, -0.13120117783546448,
            -0.4433199465274811, -0.25969570875167847, 0.6798790097236633,
            -0.34090861678123474, 0.0384102463722229, 0.2484571784734726,
            0.0456063412129879, -0.428459107875824, -0.026385333389043808,
            -0.1570669412612915, -0.2377825379371643, 0.3231588304042816,
            0.21098048985004425, -0.712349534034729
        ]

        np.testing.assert_almost_equal(scores[0].tolist(),
                                       expected_scores,
                                       decimal=5)
示例#9
0
    def predict(self, url):
        with torch.no_grad():
            detectron_features = self.get_detectron_features(url)

            sample = Sample()
            sample.dataset_name = "coco"
            sample.dataset_type = "test"
            sample.image_feature_0 = detectron_features
            sample.answers = torch.zeros((5, 10), dtype=torch.long)

            sample_list = SampleList([sample])
            sample_list = sample_list.to("cuda")

            tokens = self.pythia_model(sample_list)["captions"]

        gc.collect()
        torch.cuda.empty_cache()

        return tokens
示例#10
0
    def prepare_batch(self, batch):
        """
        Can be possibly overriden in your child class

        Prepare batch for passing to model. Whatever returned from here will
        be directly passed to model's forward function. Currently moves the batch to
        proper device.

        Args:
            batch (SampleList): sample list containing the currently loaded batch

        Returns:
            sample_list (SampleList): Returns a sample representing current batch loaded
        """
        # Should be a SampleList
        if not isinstance(batch, SampleList):
            # Try converting to SampleList
            batch = SampleList(batch)
        batch = batch.to(self._device)
        return batch
示例#11
0
    def predict(self, url, question):
        with torch.no_grad():
            detectron_features = self.get_detectron_features(url)
            resnet_features = self.get_resnet_features(url)

            sample = Sample()

            processed_text = self.text_processor({"text": question})
            sample.text = processed_text["text"]
            sample.text_len = len(processed_text["tokens"])

            sample.image_feature_0 = detectron_features
            sample.image_info_0 = Sample({
                "max_features": torch.tensor(100, dtype=torch.long)
            })

            sample.image_feature_1 = resnet_features

            sample_list = SampleList([sample])
            sample_list = sample_list.to("cuda")

            scores = self.pythia_model(sample_list)["scores"]
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            top_indices = indices[0]
            top_scores = actual[0]

            probs = []
            answers = []

            for idx, score in enumerate(top_scores):
                probs.append(score.item())
                answers.append(
                    self.answer_processor.idx2word(top_indices[idx].item())
                )

        gc.collect()
        torch.cuda.empty_cache()
        return probs, answers
示例#12
0
    def test_forward(self):
        model_config = self.config.model_attributes.cnn_lstm

        cnn_lstm = CNNLSTM(model_config)
        cnn_lstm.build()
        cnn_lstm.init_losses_and_metrics()

        self.assertTrue(isinstance(cnn_lstm, torch.nn.Module))

        test_sample = Sample()
        test_sample.text = torch.randint(1, 79, (10, ), dtype=torch.long)
        test_sample.image = torch.randn(3, 320, 480)
        test_sample.targets = torch.randn(32)

        test_sample_list = SampleList([test_sample])
        test_sample_list.dataset_type = "train"
        test_sample_list.dataset_name = "clevr"
        output = cnn_lstm(test_sample_list)

        scores = output["scores"]
        loss = output["losses"]["train/logit_bce"]
        accuracy = output["metrics"]["train/accuracy"]

        np.testing.assert_almost_equal(loss.item(), 23.4751, decimal=4)
        np.testing.assert_almost_equal(accuracy.item(), 0)
        self.assertEqual(scores.size(), torch.Size((1, 32)))

        expected_scores = [
            2.2298e-02, -2.4975e-01, -1.1960e-01, -5.0868e-01, -9.3013e-02,
            1.3202e-02, -1.7536e-01, -3.1180e-01, 1.5369e-01, 1.4900e-01,
            1.9006e-01, -1.9457e-01, 1.4924e-02, -1.1032e-01, 1.3777e-01,
            -3.6255e-01, -2.9327e-01, 5.6247e-04, -4.8732e-01, 4.0949e-01,
            -1.1069e-01, 2.9696e-01, 4.1903e-02, 6.7062e-02, 7.0094e-01,
            -1.9898e-01, -2.9502e-03, -3.9040e-01, 1.2218e-01, 3.7895e-02,
            2.4472e-02, 1.7213e-01
        ]
        np.testing.assert_almost_equal(scores[0].tolist(),
                                       expected_scores,
                                       decimal=5)
示例#13
0
    def _load_regions(self, idx, object_map, relationship_map):
        if self._return_scene_graph is None:
            return None, None

        image_info = self._get_image_info(idx)
        image_height = image_info["height"]
        image_width = image_info["width"]
        region_map = {}
        regions = []

        for region in image_info["regions"]:
            for synset in region["synsets"]:
                synset["entity_name"] = self.name_processor(
                    {"tokens": [synset["entity_name"]]})["text"]
                synset["synset_name"] = self.synset_processor(
                    {"tokens": [synset["synset_name"]]})["text"]

            region["height"] /= image_height
            region["width"] /= image_width
            region["y"] /= image_height
            region["x"] /= image_width

            relationships = []
            objects = []

            for relationship_idx in region["relationships"]:
                relationships.append(relationship_map[relationship_idx])

            for object_idx in region["objects"]:
                objects.append(object_map[object_idx])

            region["relationships"] = relationships
            region["objects"] = objects
            region["phrase"] = self.text_processor({"text":
                                                    region["phrase"]})["text"]

            region = Sample(region)
            region_map[region["region_id"]] = region
            regions.append(region)

        regions = SampleList(regions)
        return regions, region_map
示例#14
0
    def _load_relationships(self, idx, object_map):
        if self._return_relationships is None and self._return_scene_graph is None:
            return None, None

        image_info = self._get_image_info(idx)
        relationship_map = {}
        relationships = []

        for relationship in image_info["relationships"]:
            relationship["synsets"] = self.synset_processor(
                {"tokens": relationship["synsets"]})["text"]
            relationship["predicate"] = self.predicate_processor(
                {"tokens": relationship["predicate"]})["text"]
            relationship["object"] = object_map[relationship["object_id"]]
            relationship["subject"] = object_map[relationship["subject_id"]]

            relationship = Sample(relationship)
            relationship_map[relationship["relationship_id"]] = relationship
            relationships.append(relationship)

        relationships = SampleList(relationships)
        return relationships, relationship_map
示例#15
0
文件: tiki.py 项目: psnonis/TikiAI
    def getAnswers(self, image, question, meta=None):

        first = time.time()
        meta = meta or str(image)
        image = Image.open(image).convert('RGB') if isinstance(image, str) else \
                image.convert('RGB')

        print(f'Tiki : Getting Answers : {meta}, {question}')

        with torch.no_grad():

            detectron_features = self.get_detectron_features(image)
            resnet152_features = self.get_resnet152_features(image)

            start = time.time()
            sample = Sample()

            processed_text = self.text_processor({'text': question})
            sample.text = processed_text['text']
            sample.text_len = len(processed_text['tokens'])

            sample.image_feature_0 = detectron_features
            sample.image_info_0 = Sample(
                {'max_features': torch.tensor(100, dtype=torch.long)})

            sample.image_feature_1 = resnet152_features

            sample_list = SampleList([sample])

            sample_list = sample_list.to(self.device.type)

            scores = self.pythiaVQA_model(sample_list)['scores']
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            top_indices = indices[0]
            top_scores = actual[0]

            answers = []

            for rank, score in enumerate(top_scores):
                answers.append({
                    'rank':
                    rank,
                    'answer':
                    self.answer_processor.idx2word(top_indices[rank].item()),
                    'probability':
                    score.item()
                })

            answer = answers[0]['answer']

            end = time.time()

        print(
            f'Tiki : Getting Answers : PythiaVQA - Finished in {end-start:7.3f} Seconds'
        )

        processing['PythiaVQA'] = end - start

        gc.collect()

        torch.cuda.empty_cache()

        last = time.time()

        processing['InferTime'] = last - first

        return question, answer, answers
示例#16
0
    def evaluate_full(self, loader, use_tqdm=False):
        
        meter = Meter()

        # metrics = ['vqamb_map', 'vqamb_f1'] # hardcode metrics for now
        metrics = ['accuracy']
        # metrics = ['vqamb_f1pt']

        print(len(loader))
        
        with torch.no_grad():
            self.model.eval()
            tot_preds = []
            tot_targets = []
            tot_ids = []
            tot_att_pt = []
            tot_att_img = []
            tot_bbox_gt = []
            tot_bbox_pt = []
            tot_bbox_img = []
            tot_part = []
            # tot_qa_ids = []
            for batch in tqdm(loader, disable=not use_tqdm):
                report = self._forward_pass(batch)
                tot_preds.append(report.scores)
                tot_targets.append(report.targets)
                # tot_ids.extend(report.qa_id)
                # tot_att_pt.append(report.att)
                # tot_att_img.append(report.att_img)
                # tot_bbox_gt.append(report.gt_bbox)
                # tot_bbox_img.append(report.img_bbox)
                # tot_bbox_pt.append(report.pt_bbox)
                # tot_part.append(report.part)
                # tot_bbox_gt.append(report.gt_bbox)
                # tot_ptpath.append(report.ptpath)
                # tot_bbox_pt.append(report.bboxes)
                # tot_bbox_gt.append(report.gt_bbox)
                # tot_qa_ids.extend(report.qa_id)
                
            tot_preds = torch.cat(tot_preds, dim=0)
            tot_targets = torch.cat(tot_targets, dim=0)
            # tot_att_pt = torch.cat(tot_att_pt, dim=0)
            # tot_att_img = torch.cat(tot_att_img, dim=0)
            # tot_att_pt = torch.cat(tot_att_pt, dim=0)
            # tot_bbox_pt = torch.cat(tot_bbox_pt, dim=0)
            # tot_bbox_gt = torch.cat(tot_bbox_gt, dim=0)
            # tot_bbox_img = torch.cat(tot_bbox_img, dim=0)
            # Find bounding box with max attention
            
            # max_att_pt = tot_att_pt.argmax(dim=1)
            # max_bbox_pt = tot_bbox_pt[torch.arange(tot_bbox_pt.size(0)), max_att_pt]
            '''
            torch.save(tot_att_pt, 'tot_pt_att_objpartdev.pt')
            torch.save(tot_bbox_pt, 'tot_ptbboxes_objpartdev.pt')
            tot_part = sum(tot_part, [])
            torch.save(torch.Tensor(tot_part), 'tot_part_objpartdev.pt')
            '''
            # torch.save(tot_att_pt, 'tot_att_pt_localqafinal.pt')
            # torch.save(tot_att_img, 'tot_att_img_pythiaptfinal.pt')
            # torch.save(tot_bbox_pt, 'tot_bbox_pt_localqafinal.pt')
            # torch.save(tot_bbox_img, 'tot_bbox_img_pythia_ptfinal.pt')
            # torch.save(tot_bbox_gt, 'tot_bboxgt_localqafinal.pt')
            # torch.save(tot_preds, 'tot_preds_localqafinal.pt')
            # torch.save(tot_targets, 'tot_targets_localqafinal.pt')
            
            # torch.save(max_bbox_pt, 'max_pt_bbox_pythiaptfinal.pt')
            # torch.save(tot_bbox_gt, 'gt_bbox_pythiaptfinal.pt')
            
            # torch.save(tot_preds, 'tot_preds_localqa.pt')
            # torch.save(tot_targets, 'tot_targets_localqa.pt')
            # torch.save(tot_ptpath, 'tot_ptpath_vqambnew.pt')
            # torch.save(tot_att, 'tot_att_vqambnew.pt')
            # tot_qa_ids = torch.Tensor(tot_qa_ids)
            # torch.save(tot_qa_ids, 'tot_qa_ids.pt')

            model_output = {"scores": tot_preds}
            sample = Sample({"targets": tot_targets}) # "qa_index": tot_qa_index}) # "dataset_type": report.dataset_type, "dataset_name": report.dataset_name})
            sample_list = SampleList([sample])
            sample_list.add_field('dataset_type', report.dataset_type)
            sample_list.add_field('dataset_name', report.dataset_name)

            metric_fn = Metrics(metrics)
            full_met = metric_fn(sample_list, model_output)
            self.writer.write(full_met)

            if report.dataset_type == 'test':
                return
            
            meter.update(full_met)
            stop = self.early_stopping(self.current_iteration, meter)

            should_break = False
            if stop is True:
                self.writer.write("Early stopping activated")
                should_break = True
            
            self.model.train()

        return should_break