Пример #1
0
    def _init_processors(self):
        with open(os.path.join(BASE_VQA_DIR_PATH, "model_data/pythia.yaml")) as f:
            config = yaml.load(f)

        config = ConfigNode(config)
        # Remove warning
        config.training_parameters.evalai_inference = True
        registry.register("config", config)

        self.config = config

        vqa_config = config.task_attributes.vqa.dataset_attributes.vqa2
        text_processor_config = vqa_config.processors.text_processor
        answer_processor_config = vqa_config.processors.answer_processor

        text_processor_config.params.vocab.vocab_file = os.path.join(
            BASE_VQA_DIR_PATH, "model_data/vocabulary_100k.txt"
        )
        answer_processor_config.params.vocab_file = os.path.join(
            BASE_VQA_DIR_PATH, "model_data/answers_vqa.txt"
        )
        # Add preprocessor as that will needed when we are getting questions from user
        self.text_processor = VocabProcessor(text_processor_config.params)
        self.answer_processor = VQAAnswerProcessor(answer_processor_config.params)

        registry.register("vqa2_text_processor", self.text_processor)
        registry.register("vqa2_answer_processor", self.answer_processor)
        registry.register(
            "vqa2_num_final_outputs", self.answer_processor.get_vocab_size()
        )
Пример #2
0
    def test_caption_processor(self):
        path = os.path.join(
            os.path.abspath(__file__),
            "../../../pythia/common/defaults/configs/tasks/captioning/coco.yml",
        )
        with open(os.path.abspath(path)) as f:
            config = yaml.load(f, Loader=yaml.FullLoader)

        config = ConfigNode(config)
        captioning_config = config.task_attributes.captioning.dataset_attributes.coco
        caption_processor_config = captioning_config.processors.caption_processor
        vocab_path = os.path.join(os.path.abspath(__file__), "../../modules/vocab.txt")
        caption_processor_config.params.vocab.vocab_file = os.path.abspath(vocab_path)
        caption_processor = CaptionProcessor(caption_processor_config.params)

        tokens = [1, 4, 5, 6, 4, 7, 8, 2, 0, 0, 0]
        caption = caption_processor(tokens)

        # Test start, stop, pad are removed
        self.assertNotIn('<s>', caption["tokens"])
        self.assertNotIn('</s>', caption["tokens"])
        self.assertNotIn('<pad>', caption["tokens"])

        # Test caption is correct
        self.assertEqual(caption["caption"], "a man with a red helmet")
Пример #3
0
    def build_processors(self):

        print('Tiki : Initializing : Building - Text Processors')

        with open('/final/data/pythia.yaml') as f:
            config = yaml.load(f, Loader=yaml.FullLoader)

        config = ConfigNode(config)
        config.training_parameters.evalai_inference = True  # Remove warning
        registry.register('config', config)

        self.config = config
        vqa_config = config.task_attributes.vqa.dataset_attributes.vqa2
        text_processor_config = vqa_config.processors.text_processor
        answer_processor_config = vqa_config.processors.answer_processor

        text_processor_config.params.vocab.vocab_file = '/final/data/vocabulary_100k.txt'
        answer_processor_config.params.vocab_file = '/final/data/answers_vqa.txt'

        self.text_processor = VocabProcessor(text_processor_config.params)
        self.answer_processor = VQAAnswerProcessor(
            answer_processor_config.params)

        registry.register('vqa2_text_processor', self.text_processor)
        registry.register('vqa2_answer_processor', self.answer_processor)
        registry.register('vqa2_num_final_outputs',
                          self.answer_processor.get_vocab_size())
Пример #4
0
    def __init__(self, use_constrained=False):
        super(PythiaCaptioner, self).__init__()
        # load configuration file
        with open(config_file) as f:
            config = yaml.load(f)
        config = ConfigNode(config)

        self.use_constrained = use_constrained

        # the following blocks of code read some configuration
        # parameter in Pythia
        config.training_parameters.evalai_inference = True
        registry.register("config", config)
        self.config = config

        captioning_config = config.task_attributes.captioning.dataset_attributes.coco
        text_processor_config = captioning_config.processors.text_processor
        caption_processor_config = captioning_config.processors.caption_processor
        # text_processor and caption_processor are used to pre-process the text
        text_processor_config.params.vocab.vocab_file = vocab_file
        caption_processor_config.params.vocab.vocab_file = vocab_file
        self.text_processor = VocabProcessor(text_processor_config.params)
        self.caption_processor = CaptionProcessor(
            caption_processor_config.params)

        registry.register("coco_text_processor", self.text_processor)
        registry.register("coco_caption_processor", self.caption_processor)

        self.model = self._build_model()
Пример #5
0
    def __init__(self,
                 max_pred,
                 mask_prob,
                 vocab_words,
                 indexer,
                 max_len=512,
                 block_mask=False,
                 truncate_config={},
                 mask_image_regions=False,
                 mode="s2s",
                 len_vis_input=49,
                 vis_mask_prob=0.25,
                 region_bbox_prefix='',
                 region_bbox_file=None,
                 region_det_file_prefix='',
                 local_rank=-1,
                 load_vqa_ann=False,
                 id_digits=3):
        super().__init__()
        self.max_pred = max_pred  # max tokens of prediction
        self.mask_prob = mask_prob  # masking probability
        self.vocab_words = vocab_words  # vocabulary (sub)words
        self.indexer = indexer  # function from token to token index
        self.max_len = max_len
        self._tril_matrix = torch.tril(
            torch.ones((max_len, max_len), dtype=torch.long))
        self.always_truncate_tail = truncate_config.get(
            'always_truncate_tail', False)
        self.max_len_b = truncate_config.get('max_len_b', None)
        self.trunc_seg = truncate_config.get('trunc_seg', None)
        self.mask_image_regions = mask_image_regions
        assert mode in ("s2s", "bi")
        self.mode = mode
        self.region_bbox_prefix = region_bbox_prefix
        self.region_bbox_file = region_bbox_file
        self.region_det_file_prefix = region_det_file_prefix
        self.id_digits = id_digits

        self.len_vis_input = len_vis_input
        self.vis_mask_prob = vis_mask_prob
        self.task_idx = 0
        # for images
        if load_vqa_ann:
            # import packages from pythia
            import pythia.tasks.processors as pythia_proc  # VQAAnswerProcessor
            from pythia.utils.configuration import ConfigNode
            args = {
                'vocab_file':
                '/home/jupyter/VLP/pythia/data/vocabs/answers_vqa.txt',
                'num_answers': 10,
                'preprocessor': {
                    'type': 'simple_word',
                    'params': {}
                }
            }
            args = ConfigNode(args)
            self.ans_proc = pythia_proc.registry.get_processor_class(
                'vqa_answer')(args)
        else:
            self.ans_proc = None
Пример #6
0
    def __init__(self, use_constrained=False):
        super(PythiaCaptioner, self).__init__()
        # load configuration file
        with open(config_file) as f:
            config = yaml.load(f)
        config = ConfigNode(config)

        self.use_constrained = use_constrained

        # TODO: not sure what these two lines really means
        config.training_parameters.evalai_inference = True
        registry.register("config", config)
        self.config = config

        captioning_config = config.task_attributes.captioning.dataset_attributes.coco
        text_processor_config = captioning_config.processors.text_processor
        caption_processor_config = captioning_config.processors.caption_processor

        text_processor_config.params.vocab.vocab_file = vocab_file
        caption_processor_config.params.vocab.vocab_file = vocab_file
        self.text_processor = VocabProcessor(text_processor_config.params)
        self.caption_processor = CaptionProcessor(
            caption_processor_config.params)

        registry.register("coco_text_processor", self.text_processor)
        registry.register("coco_caption_processor", self.caption_processor)

        self.model = self._build_model()
Пример #7
0
  def _init_processors(self):
    with open(model_yaml) as f:
      config = yaml.load(f)

    config = ConfigNode(config)
    # Remove warning
    config.training_parameters.evalai_inference = True
    registry.register("config", config)

    self.config = config

    captioning_config = config.task_attributes.captioning.dataset_attributes.coco
    # captioning_config = config.task_attributes.captioning.dataset_attributes.youcookII
    text_processor_config = captioning_config.processors.text_processor
    caption_processor_config = captioning_config.processors.caption_processor
    # print("DEBUG captioning_config:", captioning_config)
    # print("DEBUG text_processor_config:", text_processor_config)
    # print("DEBUG caption_processor_config:", caption_processor_config)

    text_processor_config.params.vocab.vocab_file = "content/model_data/vocabulary_captioning_thresh5.txt"
    caption_processor_config.params.vocab.vocab_file = "content/model_data/vocabulary_captioning_thresh5.txt"
    self.text_processor = VocabProcessor(text_processor_config.params)
    self.caption_processor = CaptionProcessor(caption_processor_config.params)
    # print("DEBUG text_processor:", self.text_processor)
    # print("DEBUG caption_processor:", self.caption_processor)

    registry.register("coco_text_processor", self.text_processor)
    registry.register("coco_caption_processor", self.caption_processor)
Пример #8
0
    def __init__(self, max_pred, mask_prob, vocab_words, indexer, max_len=512, block_mask=False, new_segment_ids=False, truncate_config={}, mask_image_regions=False, mode="s2s", len_vis_input=49, vis_mask_prob=0.25, enable_butd=False, region_bbox_file='', region_det_file_prefix='', local_rank=-1, load_vqa_ann=False):
        super().__init__()
        self.max_len = max_len
        self.max_pred = max_pred  # max tokens of prediction
        self.mask_prob = mask_prob  # masking probability
        self.vocab_words = vocab_words  # vocabulary (sub)words
        self.indexer = indexer  # function from token to token index
        self.max_len = max_len
        self._tril_matrix = torch.tril(torch.ones(
            (max_len, max_len), dtype=torch.long))
        self.new_segment_ids = new_segment_ids
        self.always_truncate_tail = truncate_config.get(
            'always_truncate_tail', False)
        self.max_len_a = truncate_config.get('max_len_a', None)
        self.max_len_b = truncate_config.get('max_len_b', None)
        self.trunc_seg = truncate_config.get('trunc_seg', None)
        self.mask_image_regions = mask_image_regions
        assert mode in ("s2s", "l2r", "bi")
        self.mode = mode
        self.region_bbox_file = region_bbox_file
        self.region_det_file_prefix = region_det_file_prefix
        
        with open(self.region_bbox_file, 'rb') as region_bbox_f:
            self.bbox_dict = pickle.load(region_bbox_f, encoding="bytes")

        if mode == 's2s':
            self.task_idx = 3   # relax projection layer for different tasks
        elif mode == 'bi':
            self.task_idx = 0
        elif mode == 'l2r':
            self.task_idx = 1

        self.len_vis_input = len_vis_input
        self.vis_mask_prob = vis_mask_prob

        # for images
        self.enable_butd = enable_butd
        if not enable_butd:
            self.Resize = transforms.Resize((255, 255))
            self.RandomCrop = transforms.RandomCrop((224, 224))
            self.ToTensor = transforms.ToTensor()
            self.res_Normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        else:
            if load_vqa_ann:
                # import packages from pythia
                import pythia.tasks.processors as pythia_proc # VQAAnswerProcessor
                from pythia.utils.configuration import ConfigNode
                args = {'vocab_file': 'pythia/data/vocabs/answers_vqa.txt', 'num_answers':10, 'preprocessor':{'type':'simple_word', 'params':{}}}
                args = ConfigNode(args)
                self.ans_proc = pythia_proc.registry.get_processor_class('vqa_answer')(args)
            else:
                self.ans_proc = None
Пример #9
0
    def __init__(self):
        config_file = 'model_data/butd.yaml'
        vocab_file = 'model_data/vocabulary_captioning_thresh5.txt'

        with open(config_file) as f:
            config = yaml.load(f)
        config = ConfigNode(config)

        captioning_config = config.task_attributes.captioning.dataset_attributes.coco
        text_processor_config = captioning_config.processors.text_processor
        text_processor_config.params.vocab.vocab_file = vocab_file
        text_processor = VocabProcessor(text_processor_config.params)

        self.vocab = text_processor.vocab
Пример #10
0
    def __init__(self, config, *args, **kwargs):
        if not hasattr(config, "vocab"):
            raise AttributeError(
                "Config passed to the processor has no attribute vocab")
        vocab_processor_config = ConfigNode(config)
        # GloVeProcessor needs vocab type to be "intersected"
        vocab_processor_config.vocab.type = "intersected"

        if "vocab_file" not in vocab_processor_config.vocab:
            warnings.warn("'vocab_file' key is not present in the config."
                          " Switching to pretrained vocab.")

            vocab_processor_config.vocab.type = "pretrained"

        super().__init__(vocab_processor_config, *args, **kwargs)
Пример #11
0
def build_caption_model(caption_config: Dict, cuda_device: torch.device):
    """

    Parameters
    ----------
    caption_config : Dict
        Dict of BUTD and Detectron model configuration.
    cuda_device : torch.device
        Torch device to load the model to.

    Returns
    -------
    (model, caption_processor, text_processor) : List[object]
        Returns the model, caption and text processor


    """
    with open(caption_config["butd_model"]["config_yaml"]) as f:
        butd_config = yaml.load(f, Loader=yaml.FullLoader)
    butd_config = ConfigNode(butd_config)
    butd_config.training_parameters.evalai_inference = True
    registry.register("config", butd_config)

    caption_processor, text_processor = init_processors(
        caption_config, butd_config)

    if cuda_device == torch.device('cpu'):
        state_dict = torch.load(caption_config["butd_model"]["model_pth"],
                                map_location='cpu')
    else:
        state_dict = torch.load(caption_config["butd_model"]["model_pth"])

    model_config = butd_config.model_attributes.butd
    model_config.model_data_dir = caption_config["model_data_dir"]
    model = BUTD(model_config)
    model.build()
    model.init_losses_and_metrics()

    if list(state_dict.keys())[0].startswith('module') and \
            not hasattr(model, 'module'):
        state_dict = multi_gpu_state_to_single(state_dict)

    model.load_state_dict(state_dict)
    model.to(cuda_device)
    model.eval()

    return model, caption_processor, text_processor
Пример #12
0
    def _init_processors(self):
        with open("model_data/butd.yaml") as f:
            config = yaml.load(f)

            config = ConfigNode(config)
            config.training_parameters.evalai_inference = True
            registry.register("config", config)

            self.config = config

            captioning_config = config.task_attributes.captioning.dataset_attributes.coco
            text_processor_config = captioning_config.processors.text_processor
            caption_processor_config = captioning_config.processors.caption_processor

            text_processor_config.params.vocab.vocab_file = "model_data/vocabulary_captioning_thresh5.txt"
            caption_processor_config.params.vocab.vocab_file = "model_data/vocabulary_captioning_thresh5.txt"
            self.text_processor = VocabProcessor(text_processor_config.params)
            self.caption_processor = CaptionProcessor(caption_processor_config.params)

            registry.register("coco_text_processor", self.text_processor)
            registry.register("coco_caption_processor", self.caption_processor)
Пример #13
0
    def _init_text_embeddings(self, attr="text"):
        if "embeddings" not in attr:
            attr += "_embeddings"

        text_embeddings = []
        text_embeddings_list_config = self.config[attr]

        embeddings_out_dim = 0

        for text_embedding in text_embeddings_list_config:
            embedding_type = text_embedding.type
            embedding_kwargs = ConfigNode(text_embedding.params)

            self._update_text_embedding_args(embedding_kwargs)

            embedding = TextEmbedding(embedding_type, **embedding_kwargs)

            text_embeddings.append(embedding)
            embeddings_out_dim += embedding.text_out_dim

        setattr(self, attr + "_out_dim", embeddings_out_dim)
        setattr(self, attr, nn.ModuleList(text_embeddings))
Пример #14
0
    def test_caption_bleu4(self):
        path = os.path.join(
            os.path.abspath(__file__),
            "../../../pythia/common/defaults/configs/datasets/captioning/coco.yml",
        )
        with open(os.path.abspath(path)) as f:
            config = yaml.load(f, Loader=yaml.FullLoader)

        config = ConfigNode(config)
        captioning_config = config.dataset_attributes.coco
        caption_processor_config = captioning_config.processors.caption_processor
        vocab_path = os.path.join(os.path.abspath(__file__), "..", "..",
                                  "data", "vocab.txt")
        caption_processor_config.params.vocab.vocab_file = os.path.abspath(
            vocab_path)
        caption_processor = CaptionProcessor(caption_processor_config.params)
        registry.register("coco_caption_processor", caption_processor)

        caption_bleu4 = metrics.CaptionBleu4Metric()
        expected = Sample()
        predicted = dict()

        # Test complete match
        expected.answers = torch.empty((5, 5, 10))
        expected.answers.fill_(4)
        predicted["scores"] = torch.zeros((5, 10, 19))
        predicted["scores"][:, :, 4] = 1.0

        self.assertEqual(
            caption_bleu4.calculate(expected, predicted).item(), 1.0)

        # Test partial match
        expected.answers = torch.empty((5, 5, 10))
        expected.answers.fill_(4)
        predicted["scores"] = torch.zeros((5, 10, 19))
        predicted["scores"][:, 0:5, 4] = 1.0

        self.assertAlmostEqual(
            caption_bleu4.calculate(expected, predicted).item(), 0.3928, 4)
Пример #15
0
 def _get_config(self, path):
     path = os.path.join(os.path.abspath(__file__), path)
     with open(os.path.abspath(path)) as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
     config = ConfigNode(config)
     return config
Пример #16
0
#!/usr/bin/env python3

import yaml

from pythia.utils.configuration import ConfigNode
from pythia.tasks.processors import VocabProcessor, VQAAnswerProcessor

from torchvision import models

if __name__ == '__main__':

    resnet152 = models.resnet152(pretrained=True)

    with open('/final/data/pythia.yaml') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    config = ConfigNode(config)
    vqa_config = config.task_attributes.vqa.dataset_attributes.vqa2
    text_processor_config = vqa_config.processors.text_processor
    answer_processor_config = vqa_config.processors.answer_processor

    text_processor_config.params.vocab.vocab_file = '/final/data/vocabulary_100k.txt'
    answer_processor_config.params.vocab_file = '/final/data/answers_vqa.txt'

    text_processor = VocabProcessor(text_processor_config.params)
    answer_processor = VQAAnswerProcessor(answer_processor_config.params)