def build(self):
        assert len(self._datasets) > 0
        num_question_choices = registry.get(
            _TEMPLATES["question_vocab_size"].format(self._datasets[0]))
        num_answer_choices = registry.get(
            _TEMPLATES["number_of_answers"].format(self._datasets[0]))

        self.text_embedding = nn.Embedding(
            num_question_choices, self.config.text_embedding.embedding_dim)
        self.lstm = nn.LSTM(**self.config.lstm)

        layers_config = self.config.cnn.layers
        conv_layers = []
        for i in range(len(layers_config.input_dims)):
            conv_layers.append(
                ConvNet(
                    layers_config.input_dims[i],
                    layers_config.output_dims[i],
                    kernel_size=layers_config.kernel_sizes[i],
                ))
        conv_layers.append(Flatten())
        self.cnn = nn.Sequential(*conv_layers)

        # As we generate output dim dynamically, we need to copy the config
        # to update it
        classifier_config = deepcopy(self.config.classifier)
        classifier_config.params.out_dim = num_answer_choices
        self.classifier = ClassifierLayer(classifier_config.type,
                                          **classifier_config.params)
    def upgrade_state_dict(self, state_dict):
        data_parallel = registry.get("data_parallel") or registry.get(
            "distributed")
        data_parallel = data_parallel or isinstance(
            self.trainer.model,
            (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel),
        )
        if data_parallel:
            model = self.trainer.model.module
        else:
            model = self.trainer.model

        new_dict = {}
        for attr in state_dict:
            new_attr = model.format_state_key(attr)
            if not data_parallel and attr.startswith("module."):
                # In case the ckpt was actually a data parallel model
                # replace first module. from dataparallel with empty string
                new_attr = new_attr.replace("module.", "", 1)
            elif data_parallel and not attr.startswith("module."):
                new_attr = "module." + new_attr

            # Log if key has changed but not when the difference
            # is only due to data parallel's `module`
            if new_attr != attr and ("module." + new_attr != attr):
                logger.info(f"Will load key {new_attr} from {attr}")
            new_dict[new_attr] = state_dict[attr]
        return new_dict
Exemplo n.º 3
0
def setup_imports():
    from VisualBERT.mmf.common.registry import registry

    # First, check if imports are already setup
    has_already_setup = registry.get("imports_setup", no_warning=True)
    if has_already_setup:
        return
    # Automatically load all of the modules, so that
    # they register with registry
    root_folder = registry.get("mmf_root", no_warning=True)

    if root_folder is None:
        root_folder = os.path.dirname(os.path.abspath(__file__))
        root_folder = os.path.join(root_folder, "..")

        environment_mmf_path = os.environ.get("MMF_PATH",
                                              os.environ.get("PYTHIA_PATH"))

        if environment_mmf_path is not None:
            root_folder = environment_mmf_path

        registry.register("pythia_path", root_folder)
        registry.register("mmf_path", root_folder)

    trainer_folder = os.path.join(root_folder, "trainers")
    trainer_pattern = os.path.join(trainer_folder, "**", "*.py")
    datasets_folder = os.path.join(root_folder, "datasets")
    datasets_pattern = os.path.join(datasets_folder, "**", "*.py")
    model_folder = os.path.join(root_folder, "models")
    model_pattern = os.path.join(model_folder, "**", "*.py")

    importlib.import_module("mmf.common.meter")

    files = (glob.glob(datasets_pattern, recursive=True) +
             glob.glob(model_pattern, recursive=True) +
             glob.glob(trainer_pattern, recursive=True))

    for f in files:
        f = os.path.realpath(f)
        if f.endswith(".py") and not f.endswith("__init__.py"):
            splits = f.split(os.sep)
            import_prefix_index = 0
            for idx, split in enumerate(splits):
                if split == "mmf":
                    import_prefix_index = idx + 1
            file_name = splits[-1]
            module_name = file_name[:file_name.find(".py")]
            module = ".".join(["VisualBERT", "mmf"] +
                              splits[import_prefix_index:-1] + [module_name])
            importlib.import_module(module)

    registry.register("imports_setup", True)
Exemplo n.º 4
0
def log_progress(info: Union[Dict, Any], log_format="simple"):
    """Useful for logging progress dict.

    Args:
        info (dict|any): If dict, will be logged as key value pair. Otherwise,
            it will be logged directly.

        log_format (str, optional): json|simple. Defaults to "simple".
            Will use simple mode.
    """
    caller, key = _find_caller()
    logger = logging.getLogger(caller)

    if not isinstance(info, collections.Mapping):
        logger.info(info)

    if log_format == "simple":
        config = registry.get("config")
        if config:
            log_format = config.training.log_format

    if log_format == "simple":
        output = ", ".join([f"{key}: {value}" for key, value in info.items()])
    elif log_format == "json":
        output = json.dumps(info)
    else:
        output = str(info)

    logger.info(output)
Exemplo n.º 5
0
    def __init__(self):
        import nltk.translate.bleu_score as bleu_score

        self._bleu_score = bleu_score
        super().__init__("caption_bleu4")
        self.caption_processor = registry.get("coco_caption_processor")
        self.required_params = ["scores", "answers", "captions"]
    def __init__(self, optimizer, *args, **kwargs):
        from VisualBERT.mmf.utils.general import lr_lambda_update

        self._lambda_func = lr_lambda_update
        self._global_config = registry.get("config")

        super().__init__(optimizer, self.lr_lambda, *args, **kwargs)
    def __init__(self, multi_task_instance):
        self.test_task = multi_task_instance
        self.task_type = multi_task_instance.dataset_type
        self.config = registry.get("config")
        self.report = []
        self.timer = Timer()
        self.training_config = self.config.training
        self.num_workers = self.training_config.num_workers
        self.batch_size = self.training_config.batch_size
        self.report_folder_arg = get_mmf_env(key="report_dir")
        self.experiment_name = self.training_config.experiment_name

        self.datasets = []

        for dataset in self.test_task.get_datasets():
            self.datasets.append(dataset)

        self.current_dataset_idx = -1
        self.current_dataset = self.datasets[self.current_dataset_idx]

        self.save_dir = get_mmf_env(key="save_dir")
        self.report_folder = ckpt_name_from_core_args(self.config)
        self.report_folder += foldername_from_config_override(self.config)

        self.report_folder = os.path.join(self.save_dir, self.report_folder)
        self.report_folder = os.path.join(self.report_folder, "reports")

        if self.report_folder_arg:
            self.report_folder = self.report_folder_arg

        PathManager.mkdirs(self.report_folder)
Exemplo n.º 8
0
 def _build_word_embedding(self):
     self.text_processor = registry.get(self._datasets[0] +
                                        "_text_processor")
     self.vocab = self.text_processor.vocab
     self.vocab_size = self.vocab.get_size()
     self.word_embedding = self.vocab.get_embedding(
         torch.nn.Embedding, embedding_dim=self.config.embedding_dim)
     self.text_embeddings_out_dim = self.config.embedding_dim
Exemplo n.º 9
0
def get_mmf_root():
    from VisualBERT.mmf.common.registry import registry

    mmf_root = registry.get("mmf_root", no_warning=True)
    if mmf_root is None:
        mmf_root = os.path.dirname(os.path.abspath(__file__))
        mmf_root = os.path.abspath(os.path.join(mmf_root, ".."))
        registry.register("mmf_root", mmf_root)
    return mmf_root
Exemplo n.º 10
0
    def _build_output(self):
        # dynamic OCR-copying scores with pointer network
        self.ocr_ptr_net = OcrPtrNet(**self.config.classifier.ocr_ptr_net)

        # fixed answer vocabulary scores
        num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
        # remove the OCR copying dimensions in LoRRA's classifier output
        # (OCR copying will be handled separately)
        num_choices -= self.config.classifier.ocr_max_num
        self.classifier = ClassifierLayer(
            self.config.classifier.type,
            in_dim=self.mmt_config.hidden_size,
            out_dim=num_choices,
            **self.config.classifier.params,
        )

        self.answer_processor = registry.get(self._datasets[0] +
                                             "_answer_processor")
Exemplo n.º 11
0
    def _init_classifier(self, combined_embedding_dim):
        # TODO: Later support multihead
        num_choices = registry.get(self._datasets[0] + "_num_final_outputs")

        self.classifier = ClassifierLayer(
            self.config.classifier.type,
            in_dim=combined_embedding_dim,
            out_dim=num_choices,
            **self.config.classifier.params,
        )
Exemplo n.º 12
0
    def __init__(self, loss_list):
        super().__init__()
        self.losses = nn.ModuleList()
        config = registry.get("config")
        self._evaluation_predict = False
        if config:
            self._evaluation_predict = config.get("evaluation",
                                                  {}).get("predict", False)

        for loss in loss_list:
            self.losses.append(MMFLoss(loss))
def get_global_config(key=None):
    config = registry.get("config")
    if config is None:
        configuration = Configuration()
        config = configuration.get_config()
        registry.register("config", config)

    if key:
        config = OmegaConf.select(config, key)

    return config
Exemplo n.º 14
0
    def calculate(self, sample_list, model_output, *args, **kwargs):
        """Calculate vqa accuracy and return it back.

        Args:
            sample_list (SampleList): SampleList provided by DataLoader for
                                current iteration
            model_output (Dict): Dict returned by model.

        Returns:
            torch.FloatTensor: VQA Accuracy

        """
        output = model_output["scores"]
        expected = sample_list["answers"]

        answer_processor = registry.get(sample_list.dataset_name +
                                        "_answer_processor")
        answer_space_size = answer_processor.get_true_vocab_size()

        output = self._masked_unk_softmax(output, 1, 0)
        output = output.argmax(dim=1).clone().tolist()
        accuracy = []

        for idx, answer_id in enumerate(output):
            if answer_id >= answer_space_size:
                answer_id -= answer_space_size
                answer = sample_list["context_tokens"][idx][answer_id]
            else:
                answer = answer_processor.idx2word(answer_id)

            answer = self.evalai_answer_processor(answer)

            gt_answers = [
                self.evalai_answer_processor(x) for x in expected[idx]
            ]
            gt_answers = list(enumerate(gt_answers))

            gt_acc = []
            for gt_answer in gt_answers:
                other_answers = [
                    item for item in gt_answers if item != gt_answer
                ]
                matching_answers = [
                    item for item in other_answers if item[1] == answer
                ]
                acc = min(1, float(len(matching_answers)) / 3)
                gt_acc.append(acc)
            avgGTAcc = float(sum(gt_acc)) / len(gt_acc)
            accuracy.append(avgGTAcc)

        accuracy = float(sum(accuracy)) / len(accuracy)

        return model_output["scores"].new_tensor(accuracy, dtype=torch.float)
Exemplo n.º 15
0
    def _init_classifier(self, combined_embedding_dim: int):
        # TODO: Later support multihead
        num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
        params = self.config["classifier"].get("params")
        if params is None:
            params = {}

        self.classifier = ClassifierLayer(
            self.config.classifier.type,
            in_dim=combined_embedding_dim,
            out_dim=num_choices,
            **params
        )
Exemplo n.º 16
0
    def test_init_processors(self):
        path = os.path.join(
            os.path.abspath(__file__),
            "../../../mmf/configs/datasets/vqa2/defaults.yaml",
        )
        args = dummy_args()
        args.opts.append(f"config={path}")
        configuration = Configuration(args)
        answer_processor = (
            configuration.get_config().dataset_config.vqa2.processors.answer_processor
        )
        vocab_path = os.path.join(
            os.path.abspath(__file__), "..", "..", "data", "vocab.txt"
        )
        answer_processor.params.vocab_file = os.path.abspath(vocab_path)
        self._fix_configuration(configuration)
        configuration.freeze()

        base_dataset = BaseDataset(
            "vqa2", configuration.get_config().dataset_config.vqa2, "train"
        )
        expected_processors = [
            "answer_processor",
            "ocr_token_processor",
            "bbox_processor",
        ]

        # Check no processors are initialized before init_processors call
        self.assertFalse(any(hasattr(base_dataset, key) for key in expected_processors))

        for processor in expected_processors:
            self.assertIsNone(registry.get("{}_{}".format("vqa2", processor)))

        # Check processors are initialized after init_processors
        base_dataset.init_processors()
        self.assertTrue(all(hasattr(base_dataset, key) for key in expected_processors))
        for processor in expected_processors:
            self.assertIsNotNone(registry.get("{}_{}".format("vqa2", processor)))
 def __init__(self,
              dataset_name,
              config,
              dataset_type="train",
              *args,
              **kwargs):
     super().__init__()
     if config is None:
         config = {}
     self.config = config
     self._dataset_name = dataset_name
     self._dataset_type = dataset_type
     self._global_config = registry.get("config")
     self._device = get_current_device()
     self.use_cuda = "cuda" in str(self._device)
Exemplo n.º 18
0
    def forward(self, weighted_attn):
        # Get LSTM state
        state = registry.get(f"{weighted_attn.device}_lstm_state")
        h1, c1 = state["td_hidden"]
        h2, c2 = state["lm_hidden"]

        # Language LSTM
        h2, c2 = self.language_lstm(torch.cat([weighted_attn, h1], dim=1),
                                    (h2, c2))
        predictions = self.fc(self.dropout(h2))

        # Update hidden state for t+1
        state["lm_hidden"] = (h2, c2)

        return predictions
Exemplo n.º 19
0
    def forward(self, image_feat, embedding):
        image_feat_mean = image_feat.mean(1)

        # Get LSTM state
        state = registry.get(f"{image_feat.device}_lstm_state")
        h1, c1 = state["td_hidden"]
        h2, c2 = state["lm_hidden"]

        h1, c1 = self.top_down_lstm(
            torch.cat([h2, image_feat_mean, embedding], dim=1), (h1, c1))

        state["td_hidden"] = (h1, c1)

        image_fa = self.fa_image(image_feat)
        hidden_fa = self.fa_hidden(h1)

        joint_feature = self.relu(image_fa + hidden_fa.unsqueeze(1))
        joint_feature = self.dropout(joint_feature)

        return joint_feature
Exemplo n.º 20
0
def build_processors(
    processors_config: mmf_typings.DictConfig, registry_key: str = None, *args, **kwargs
) -> ProcessorDict:
    """Given a processor config, builds the processors present and returns back
    a dict containing processors mapped to keys as per the config

    Args:
        processors_config (mmf_typings.DictConfig): OmegaConf DictConfig describing
            the parameters and type of each processor passed here

        registry_key (str, optional): If passed, function would look into registry for
            this particular key and return it back. .format with processor_key will
            be called on this string. Defaults to None.

    Returns:
        ProcessorDict: Dictionary containing key to
            processor mapping
    """
    from VisualBERT.mmf.datasets.processors.processors import Processor

    processor_dict = {}

    for processor_key, processor_params in processors_config.items():
        if not processor_params:
            continue

        processor_instance = None
        if registry_key is not None:
            full_key = registry_key.format(processor_key)
            processor_instance = registry.get(full_key, no_warning=True)

        if processor_instance is None:
            processor_instance = Processor(processor_params, *args, **kwargs)
            # We don't register back here as in case of hub interface, we
            # want the processors to be instantiate every time. BaseDataset
            # can register at its own end
        processor_dict[processor_key] = processor_instance

    return processor_dict
Exemplo n.º 21
0
    def calculate(self, sample_list, model_output, *args, **kwargs):
        answer_processor = registry.get(sample_list.dataset_name +
                                        "_answer_processor")

        batch_size = sample_list.context_tokens.size(0)
        pred_answers = model_output["scores"].argmax(dim=-1)
        context_tokens = sample_list.context_tokens.cpu().numpy()
        answers = sample_list.get(self.gt_key).cpu().numpy()
        answer_space_size = answer_processor.get_true_vocab_size()

        predictions = []
        from VisualBERT.mmf.utils.distributed import byte_tensor_to_object
        from VisualBERT.mmf.utils.text import word_tokenize

        for idx in range(batch_size):
            tokens = byte_tensor_to_object(context_tokens[idx])
            answer_words = []
            for answer_id in pred_answers[idx].tolist():
                if answer_id >= answer_space_size:
                    answer_id -= answer_space_size
                    answer_words.append(word_tokenize(tokens[answer_id]))
                else:
                    if answer_id == answer_processor.EOS_IDX:
                        break
                    answer_words.append(
                        answer_processor.answer_vocab.idx2word(answer_id))

            pred_answer = " ".join(answer_words).replace(" 's", "'s")
            gt_answers = byte_tensor_to_object(answers[idx])
            predictions.append({
                "pred_answer": pred_answer,
                "gt_answers": gt_answers
            })

        accuracy = self.evaluator.eval_pred_list(predictions)
        accuracy = torch.tensor(accuracy).to(sample_list.context_tokens.device)

        return accuracy
Exemplo n.º 22
0
 def __init__(self, config):
     super().__init__(config)
     self.config = config
     self._global_config = registry.get("config")
     self._datasets = self._global_config.datasets.split(",")
    def save(self, update, iteration=None, update_best=False):
        # Only save in main process
        if not is_master():
            return

        if not iteration:
            iteration = update

        ckpt_filepath = os.path.join(self.models_foldername,
                                     "model_%d.ckpt" % update)
        best_ckpt_filepath = os.path.join(self.ckpt_foldername,
                                          self.ckpt_prefix + "best.ckpt")
        current_ckpt_filepath = os.path.join(self.ckpt_foldername,
                                             self.ckpt_prefix + "current.ckpt")

        best_iteration = (self.trainer.early_stop_callback.early_stopping.
                          best_monitored_iteration)
        best_update = (self.trainer.early_stop_callback.early_stopping.
                       best_monitored_update)
        best_metric = (self.trainer.early_stop_callback.early_stopping.
                       best_monitored_value)
        model = self.trainer.model
        data_parallel = registry.get("data_parallel") or registry.get(
            "distributed")
        fp16_scaler = getattr(self.trainer, "scaler", None)
        fp16_scaler_dict = None

        if fp16_scaler is not None:
            fp16_scaler_dict = fp16_scaler.state_dict()

        if data_parallel is True:
            model = model.module

        ckpt = {
            "model": model.state_dict(),
            "optimizer": self.trainer.optimizer.state_dict(),
            "best_iteration": best_iteration,
            "current_iteration": iteration,
            "current_epoch": self.trainer.current_epoch,
            "num_updates": update,
            "best_update": best_update,
            "best_metric_value": best_metric,
            "fp16_scaler": fp16_scaler_dict,
            # Convert to container to avoid any dependencies
            "config": OmegaConf.to_container(self.config, resolve=True),
        }

        lr_scheduler = self.trainer.lr_scheduler_callback._scheduler
        if lr_scheduler is not None:
            ckpt["lr_scheduler"] = lr_scheduler.state_dict()

        if self.git_repo:
            git_metadata_dict = self._get_vcs_fields()
            ckpt.update(git_metadata_dict)

        with PathManager.open(ckpt_filepath, "wb") as f:
            torch.save(ckpt, f)

        if update_best:
            with PathManager.open(best_ckpt_filepath, "wb") as f:
                torch.save(ckpt, f)

        # Save current always
        with PathManager.open(current_ckpt_filepath, "wb") as f:
            torch.save(ckpt, f)

        # Remove old checkpoints if max_to_keep is set
        if self.max_to_keep > 0:
            if len(self.saved_iterations) == self.max_to_keep:
                self.remove(self.saved_iterations.pop(0))
            self.saved_iterations.append(update)
Exemplo n.º 24
0
 def __init__(self, config):
     super().__init__(config)
     self.mmt_config = BertConfig(**self.config.mmt)
     self._datasets = registry.get("config").datasets.split(",")
Exemplo n.º 25
0
 def _init_classifier(self):
     num_hidden = self.config.text_embedding.num_hidden
     num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
     dropout = self.config.classifier.dropout
     self.classifier = WeightNormClassifier(num_hidden, num_choices,
                                            num_hidden * 2, dropout)
Exemplo n.º 26
0
 def _build_word_embedding(self):
     assert len(self._datasets) > 0
     text_processor = registry.get(self._datasets[0] + "_text_processor")
     vocab = text_processor.vocab
     self.word_embedding = vocab.get_embedding(torch.nn.Embedding, embedding_dim=300)