示例#1
0
    def prepare_batch(self, batch):
        """
        Can be possibly overriden in your child class

        Prepare batch for passing to model. Whatever returned from here will
        be directly passed to model's forward function

        Parameters
        ----------
        batch: dict
            Dictionary containing information about the next
            sample in batched form

        Returns
        -------
        data: dict
            Contains variables in the following format
            'texts': The main text of the batch which can be a question in
            most of the cases
            'image_features': Image features for the current batch
            'image_dim': Max BBoxes for the images
            'contexts': Contains context relevant to current batch, in VisDial
            this will be the history of the dialog till now

        obs: tensor
            Tensor containing observations for the current batch
        """
        # Should be a SampleList
        if not isinstance(batch, SampleList):
            # Try converting to SampleList
            batch = SampleList(batch)
        batch = batch.to(self._device)
        return batch
示例#2
0
    def predict(self, url, feat_name, get_features=False):
        with torch.no_grad():
            detectron_features = get_detectron_features([url],
                                                        self.detection_model,
                                                        False, feat_name,
                                                        self.cuda_device)
            # returns a single-element list
            detectron_features = detectron_features[0]

            sample = Sample()
            sample.dataset_name = "coco"
            sample.dataset_type = "test"
            sample.image_feature_0 = detectron_features
            sample.answers = torch.zeros((5, 10), dtype=torch.long)

            sample_list = SampleList([sample])
            sample_list = sample_list.to(self.cuda_device)

            tokens = self.caption_model(sample_list)["captions"]

        gc.collect()
        torch.cuda.empty_cache()

        if not get_features:
            return tokens
        else:
            return tokens, detectron_features
示例#3
0
    def forward(self, images, image_scales, transitions=None):
        feature_list = self.encoder(images, image_scales)
        image_features = feature_list[0]
        assert len(
            feature_list) == 1, 'current model only support batch size 1'

        sample = Sample()
        sample.dataset_name = "coco"
        sample.dataset_type = "test"
        sample.image_feature_0 = image_features
        # it seems answers work as a place holder here
        # hence, it does not matter what it's size is
        sample.answers = torch.zeros((1, 10), dtype=torch.long)
        sample_list = SampleList([sample])
        sample_list = sample_list.to(device)
        # set_trace()
        if transitions is not None:
            sample_list.transitions = transitions

        output = self.decoder(sample_list)
        tokens = output['captions']
        caption = tokens.tolist()[0]
        caption = self.decoder.caption_processor(caption)['caption']

        return caption
示例#4
0
    def predict(self, img_paths, qud):
        """
        We enable batch prediction here
        :return:
        """
        with torch.no_grad():
            detectron_features = self.get_detectron_features(
                img_paths)  # a list of image features
            resnet_features = self.get_resnet_features(
                img_paths)  # [batch_size, 196, 2048]

            sample_list = []
            for i in range(len(detectron_features)):
                sample = Sample()
                processed_text = self.vqa_demo.text_processor({"text": qud})
                sample.text = processed_text["text"]
                sample.text_len = len(processed_text["tokens"])

                sample.image_feature_0 = detectron_features[i]
                sample.image_info_0 = Sample(
                    {"max_features": torch.tensor(100, dtype=torch.long)})
                sample.image_feature_1 = resnet_features[i]
                sample_list.append(sample)

            sample_list = SampleList(sample_list)
            sample_list = sample_list.to("cuda")

            scores = self.vqa_demo.pythia_model(sample_list)["scores"]
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            batch_probs = []
            batch_answers = []

            for i in range(scores.shape[0]):
                top_indices = indices[i]
                top_scores = actual[i]

                probs = []
                answers = []

                for idx, score in enumerate(top_scores):
                    probs.append(score.item())
                    answers.append(
                        self.vqa_demo.answer_processor.idx2word(
                            top_indices[idx].item()))
                batch_probs.append(probs)
                batch_answers.append(answers)

        ## if the memory becomes an issue, we then clear this
        # gc.collect()
        # torch.cuda.empty_cache()

        # list is of batch_size
        # [[ans_1, ans_2], [ans_1, ans2]]
        return batch_probs, batch_answers
示例#5
0
    def predict(self, url):
        with torch.no_grad():
            detectron_features = self.get_detectron_features(url)

            sample = Sample()
            sample.dataset_name = "coco"
            sample.dataset_type = "test"
            sample.image_feature_0 = detectron_features
            sample.answers = torch.zeros((5, 10), dtype=torch.long)

            sample_list = SampleList([sample])
            sample_list = sample_list.to("cuda")

            tokens = self.pythia_model(sample_list)["captions"]

        gc.collect()
        torch.cuda.empty_cache()

        return tokens
示例#6
0
    def prepare_batch(self, batch):
        """
        Can be possibly overriden in your child class

        Prepare batch for passing to model. Whatever returned from here will
        be directly passed to model's forward function. Currently moves the batch to
        proper device.

        Args:
            batch (SampleList): sample list containing the currently loaded batch

        Returns:
            sample_list (SampleList): Returns a sample representing current batch loaded
        """
        # Should be a SampleList
        if not isinstance(batch, SampleList):
            # Try converting to SampleList
            batch = SampleList(batch)
        batch = batch.to(self._device)
        return batch
示例#7
0
    def predict(self, url, question):
        with torch.no_grad():
            detectron_features = self.get_detectron_features(url)
            resnet_features = self.get_resnet_features(url)

            sample = Sample()

            processed_text = self.text_processor({"text": question})
            sample.text = processed_text["text"]
            sample.text_len = len(processed_text["tokens"])

            sample.image_feature_0 = detectron_features
            sample.image_info_0 = Sample({
                "max_features": torch.tensor(100, dtype=torch.long)
            })

            sample.image_feature_1 = resnet_features

            sample_list = SampleList([sample])
            sample_list = sample_list.to("cuda")

            scores = self.pythia_model(sample_list)["scores"]
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            top_indices = indices[0]
            top_scores = actual[0]

            probs = []
            answers = []

            for idx, score in enumerate(top_scores):
                probs.append(score.item())
                answers.append(
                    self.answer_processor.idx2word(top_indices[idx].item())
                )

        gc.collect()
        torch.cuda.empty_cache()
        return probs, answers
示例#8
0
文件: tiki.py 项目: psnonis/TikiAI
    def getAnswers(self, image, question, meta=None):

        first = time.time()
        meta = meta or str(image)
        image = Image.open(image).convert('RGB') if isinstance(image, str) else \
                image.convert('RGB')

        print(f'Tiki : Getting Answers : {meta}, {question}')

        with torch.no_grad():

            detectron_features = self.get_detectron_features(image)
            resnet152_features = self.get_resnet152_features(image)

            start = time.time()
            sample = Sample()

            processed_text = self.text_processor({'text': question})
            sample.text = processed_text['text']
            sample.text_len = len(processed_text['tokens'])

            sample.image_feature_0 = detectron_features
            sample.image_info_0 = Sample(
                {'max_features': torch.tensor(100, dtype=torch.long)})

            sample.image_feature_1 = resnet152_features

            sample_list = SampleList([sample])

            sample_list = sample_list.to(self.device.type)

            scores = self.pythiaVQA_model(sample_list)['scores']
            scores = torch.nn.functional.softmax(scores, dim=1)
            actual, indices = scores.topk(5, dim=1)

            top_indices = indices[0]
            top_scores = actual[0]

            answers = []

            for rank, score in enumerate(top_scores):
                answers.append({
                    'rank':
                    rank,
                    'answer':
                    self.answer_processor.idx2word(top_indices[rank].item()),
                    'probability':
                    score.item()
                })

            answer = answers[0]['answer']

            end = time.time()

        print(
            f'Tiki : Getting Answers : PythiaVQA - Finished in {end-start:7.3f} Seconds'
        )

        processing['PythiaVQA'] = end - start

        gc.collect()

        torch.cuda.empty_cache()

        last = time.time()

        processing['InferTime'] = last - first

        return question, answer, answers