示例#1
0
    def build(self):
        assert len(self._datasets) > 0
        num_question_choices = registry.get(
            _TEMPLATES["question_vocab_size"].format(self._datasets[0]))
        num_answer_choices = registry.get(
            _TEMPLATES["number_of_answers"].format(self._datasets[0]))

        self.text_embedding = nn.Embedding(
            num_question_choices, self.config.text_embedding.embedding_dim)
        self.lstm = nn.LSTM(**self.config.lstm)

        layers_config = self.config.cnn.layers
        conv_layers = []
        for i in range(len(layers_config.input_dims)):
            conv_layers.append(
                ConvNet(
                    layers_config.input_dims[i],
                    layers_config.output_dims[i],
                    kernel_size=layers_config.kernel_sizes[i],
                ))
        conv_layers.append(Flatten())
        self.cnn = nn.Sequential(*conv_layers)

        # As we generate output dim dynamically, we need to copy the config
        # to update it
        classifier_config = deepcopy(self.config.classifier)
        classifier_config.params.out_dim = num_answer_choices
        self.classifier = ClassifierLayer(classifier_config.type,
                                          **classifier_config.params)
示例#2
0
文件: checkpoint.py 项目: hahaxun/mmf
    def upgrade_state_dict(self, state_dict):
        data_parallel = registry.get("data_parallel") or registry.get(
            "distributed")
        data_parallel = data_parallel or isinstance(
            self.trainer.model,
            (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel),
        )
        if data_parallel:
            model = self.trainer.model.module
        else:
            model = self.trainer.model

        new_dict = {}
        for attr in state_dict:
            new_attr = model.format_state_key(attr)
            if not data_parallel and attr.startswith("module."):
                # In case the ckpt was actually a data parallel model
                # replace first module. from dataparallel with empty string
                new_attr = new_attr.replace("module.", "", 1)
            elif data_parallel and not attr.startswith("module."):
                new_attr = "module." + new_attr

            # Log if key has changed but not when the difference
            # is only due to data parallel's `module`
            if new_attr != attr and ("module." + new_attr != attr):
                logger.info(f"Will load key {new_attr} from {attr}")
            new_dict[new_attr] = state_dict[attr]
        return new_dict
示例#3
0
文件: env.py 项目: hahaxun/mmf
def setup_imports():
    from multimodelity.common.registry import registry

    # First, check if imports are already setup
    has_already_setup = registry.get("imports_setup", no_warning=True)
    if has_already_setup:
        return
    # Automatically load all of the modules, so that
    # they register with registry
    root_folder = registry.get("multimodelity_root", no_warning=True)

    if root_folder is None:
        root_folder = os.path.dirname(os.path.abspath(__file__))
        root_folder = os.path.join(root_folder, "..")

        environment_multimodelity_path = os.environ.get(
            "multimodelity_PATH", os.environ.get("PYTHIA_PATH"))

        if environment_multimodelity_path is not None:
            root_folder = environment_multimodelity_path

        registry.register("pythia_path", root_folder)
        registry.register("multimodelity_path", root_folder)

    trainer_folder = os.path.join(root_folder, "trainers")
    trainer_pattern = os.path.join(trainer_folder, "**", "*.py")
    datasets_folder = os.path.join(root_folder, "datasets")
    datasets_pattern = os.path.join(datasets_folder, "**", "*.py")
    model_folder = os.path.join(root_folder, "models")
    model_pattern = os.path.join(model_folder, "**", "*.py")

    importlib.import_module("multimodelity.common.meter")

    files = (glob.glob(datasets_pattern, recursive=True) +
             glob.glob(model_pattern, recursive=True) +
             glob.glob(trainer_pattern, recursive=True))

    for f in files:
        f = os.path.realpath(f)
        if f.endswith(".py") and not f.endswith("__init__.py"):
            splits = f.split(os.sep)
            import_prefix_index = 0
            for idx, split in enumerate(splits):
                if split == "multimodelity":
                    import_prefix_index = idx + 1
            file_name = splits[-1]
            module_name = file_name[:file_name.find(".py")]
            module = ".".join(["multimodelity"] +
                              splits[import_prefix_index:-1] + [module_name])
            importlib.import_module(module)

    registry.register("imports_setup", True)
示例#4
0
    def __init__(self, multi_task_instance):
        self.test_task = multi_task_instance
        self.task_type = multi_task_instance.dataset_type
        self.config = registry.get("config")
        self.report = []
        self.timer = Timer()
        self.training_config = self.config.training
        self.num_workers = self.training_config.num_workers
        self.batch_size = self.training_config.batch_size
        self.report_folder_arg = get_multimodelity_env(key="report_dir")
        self.experiment_name = self.training_config.experiment_name

        self.datasets = []

        for dataset in self.test_task.get_datasets():
            self.datasets.append(dataset)

        self.current_dataset_idx = -1
        self.current_dataset = self.datasets[self.current_dataset_idx]

        self.save_dir = get_multimodelity_env(key="save_dir")
        self.report_folder = ckpt_name_from_core_args(self.config)
        self.report_folder += foldername_from_config_override(self.config)

        self.report_folder = os.path.join(self.save_dir, self.report_folder)
        self.report_folder = os.path.join(self.report_folder, "reports")

        if self.report_folder_arg:
            self.report_folder = self.report_folder_arg

        PathManager.mkdirs(self.report_folder)
示例#5
0
文件: metrics.py 项目: hahaxun/mmf
    def calculate(self, sample_list, model_output, *args, **kwargs):
        answer_processor = registry.get(sample_list.dataset_name + "_answer_processor")

        batch_size = sample_list.context_tokens.size(0)
        pred_answers = model_output["scores"].argmax(dim=-1)
        context_tokens = sample_list.context_tokens.cpu().numpy()
        answers = sample_list.get(self.gt_key).cpu().numpy()
        answer_space_size = answer_processor.get_true_vocab_size()

        predictions = []
        from multimodelity.utils.distributed import byte_tensor_to_object
        from multimodelity.utils.text import word_tokenize

        for idx in range(batch_size):
            tokens = byte_tensor_to_object(context_tokens[idx])
            answer_words = []
            for answer_id in pred_answers[idx].tolist():
                if answer_id >= answer_space_size:
                    answer_id -= answer_space_size
                    answer_words.append(word_tokenize(tokens[answer_id]))
                else:
                    if answer_id == answer_processor.EOS_IDX:
                        break
                    answer_words.append(
                        answer_processor.answer_vocab.idx2word(answer_id)
                    )

            pred_answer = " ".join(answer_words).replace(" 's", "'s")
            gt_answers = byte_tensor_to_object(answers[idx])
            predictions.append({"pred_answer": pred_answer, "gt_answers": gt_answers})

        accuracy = self.evaluator.eval_pred_list(predictions)
        accuracy = torch.tensor(accuracy).to(sample_list.context_tokens.device)

        return accuracy
示例#6
0
文件: metrics.py 项目: hahaxun/mmf
    def __init__(self):
        import nltk.translate.bleu_score as bleu_score

        self._bleu_score = bleu_score
        super().__init__("caption_bleu4")
        self.caption_processor = registry.get("coco_caption_processor")
        self.required_params = ["scores", "answers", "captions"]
示例#7
0
    def __init__(self, optimizer, *args, **kwargs):
        from multimodelity.utils.general import lr_lambda_update

        self._lambda_func = lr_lambda_update
        self._global_config = registry.get("config")

        super().__init__(optimizer, self.lr_lambda, *args, **kwargs)
示例#8
0
def log_progress(info: Union[Dict, Any], log_format="simple"):
    """Useful for logging progress dict.

    Args:
        info (dict|any): If dict, will be logged as key value pair. Otherwise,
            it will be logged directly.

        log_format (str, optional): json|simple. Defaults to "simple".
            Will use simple mode.
    """
    caller, key = _find_caller()
    logger = logging.getLogger(caller)

    if not isinstance(info, collections.Mapping):
        logger.info(info)

    if log_format == "simple":
        config = registry.get("config")
        if config:
            log_format = config.training.log_format

    if log_format == "simple":
        output = ", ".join([f"{key}: {value}" for key, value in info.items()])
    elif log_format == "json":
        output = json.dumps(info)
    else:
        output = str(info)

    logger.info(output)
示例#9
0
文件: lxmert.py 项目: hahaxun/mmf
 def forward(self, sample_list):
     device = registry.get("config").training.device
     params = self.get_image_and_text_features(sample_list, device)
     if params["visual_feats"] is not None and params[
             "image_dim"] is not None:
         device = params["visual_feats"].device
         image_mask = (torch.arange(params["visual_feats"].size(-2)).expand(
             *params["visual_feats"].size()[:-1]).to(device))
         if len(params["image_dim"].size()) < len(image_mask.size()):
             params["image_dim"] = params["image_dim"].unsqueeze(-1)
             assert len(params["image_dim"].size()) == len(
                 image_mask.size())
         image_mask = image_mask < params["image_dim"]
         params["image_attention_mask"] = image_mask.long()
     else:
         params["image_attention_mask"] = None
     if self.config.training_head_type == "pretraining":
         output_dict = self.model(
             input_ids=params["input_ids"],
             token_type_ids=params["token_type_ids"],
             attention_mask=params["attention_mask"],
             visual_feats=params["visual_feats"],
             visual_pos=params["pos"],
             visual_attention_mask=params["image_attention_mask"],
             masked_lm_labels=params["masked_lm_labels"],
             masked_image_labels=params["masked_image_labels"],
             obj_labels=params["obj_labels"],
             matched_label=params["matched_label"],
             ans=params["ans"],
             num_features=params["max_features"],
             name=params["dataset_name"],
         )
         loss_key = "{}/{}".format(sample_list.dataset_name,
                                   sample_list.dataset_type)
         output_dict["losses"] = {}
         if "masked_lm_loss" in output_dict.keys():
             output_dict["losses"][loss_key +
                                   "/masked_lm_loss"] = output_dict.pop(
                                       "masked_lm_loss")
         if "matched_loss" in output_dict.keys():
             output_dict["losses"][loss_key +
                                   "/matched_loss"] = output_dict.pop(
                                       "matched_loss")
         if "visn_loss" in output_dict.keys():
             output_dict["losses"][
                 loss_key + "/visn_loss"] = output_dict.pop("visn_loss")
         if "answer_loss" in output_dict.keys():
             output_dict["losses"][
                 loss_key + "/answer_loss"] = output_dict.pop("answer_loss")
     else:
         output_dict = self.model(
             input_ids=params["input_ids"],
             token_type_ids=params["token_type_ids"],
             attention_mask=params["attention_mask"],
             visual_feats=params["visual_feats"],
             visual_pos=params["pos"],
             visual_attention_mask=params["image_attention_mask"],
         )
     return output_dict
示例#10
0
文件: butd.py 项目: hahaxun/mmf
 def _build_word_embedding(self):
     self.text_processor = registry.get(self._datasets[0] + "_text_processor")
     self.vocab = self.text_processor.vocab
     self.vocab_size = self.vocab.get_size()
     self.word_embedding = self.vocab.get_embedding(
         torch.nn.Embedding, embedding_dim=self.config.embedding_dim
     )
     self.text_embeddings_out_dim = self.config.embedding_dim
示例#11
0
文件: m4c.py 项目: hahaxun/mmf
    def _build_output(self):
        # dynamic OCR-copying scores with pointer network
        self.ocr_ptr_net = OcrPtrNet(**self.config.classifier.ocr_ptr_net)

        # fixed answer vocabulary scores
        num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
        # remove the OCR copying dimensions in LoRRA's classifier output
        # (OCR copying will be handled separately)
        num_choices -= self.config.classifier.ocr_max_num
        self.classifier = ClassifierLayer(
            self.config.classifier.type,
            in_dim=self.mmt_config.hidden_size,
            out_dim=num_choices,
            **self.config.classifier.params,
        )

        self.answer_processor = registry.get(self._datasets[0] +
                                             "_answer_processor")
示例#12
0
def get_multimodelity_root():
    from multimodelity.common.registry import registry

    multimodelity_root = registry.get("multimodelity_root", no_warning=True)
    if multimodelity_root is None:
        multimodelity_root = os.path.dirname(os.path.abspath(__file__))
        multimodelity_root = os.path.abspath(
            os.path.join(multimodelity_root, ".."))
        registry.register("multimodelity_root", multimodelity_root)
    return multimodelity_root
示例#13
0
文件: pythia.py 项目: hahaxun/mmf
    def _init_classifier(self, combined_embedding_dim):
        # TODO: Later support multihead
        num_choices = registry.get(self._datasets[0] + "_num_final_outputs")

        self.classifier = ClassifierLayer(
            self.config.classifier.type,
            in_dim=combined_embedding_dim,
            out_dim=num_choices,
            **self.config.classifier.params,
        )
示例#14
0
    def __init__(self, loss_list):
        super().__init__()
        self.losses = nn.ModuleList()
        config = registry.get("config")
        self._evaluation_predict = False
        if config:
            self._evaluation_predict = config.get("evaluation",
                                                  {}).get("predict", False)

        for loss in loss_list:
            self.losses.append(multimodelityLoss(loss))
示例#15
0
def get_global_config(key=None):
    config = registry.get("config")
    if config is None:
        configuration = Configuration()
        config = configuration.get_config()
        registry.register("config", config)

    if key:
        config = OmegaConf.select(config, key)

    return config
示例#16
0
文件: movie_mcan.py 项目: hahaxun/mmf
    def _init_classifier(self, combined_embedding_dim: int):
        # TODO: Later support multihead
        num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
        params = self.config["classifier"].get("params")
        if params is None:
            params = {}

        self.classifier = ClassifierLayer(
            self.config.classifier.type,
            in_dim=combined_embedding_dim,
            out_dim=num_choices,
            **params
        )
示例#17
0
    def test_init_processors(self):
        path = os.path.join(
            os.path.abspath(__file__),
            "../../../multimodelity/configs/datasets/vqa2/defaults.yaml",
        )
        args = dummy_args()
        args.opts.append(f"config={path}")
        configuration = Configuration(args)
        answer_processor = (configuration.get_config().dataset_config.vqa2.
                            processors.answer_processor)
        vocab_path = os.path.join(os.path.abspath(__file__), "..", "..",
                                  "data", "vocab.txt")
        answer_processor.params.vocab_file = os.path.abspath(vocab_path)
        self._fix_configuration(configuration)
        configuration.freeze()

        base_dataset = BaseDataset(
            "vqa2",
            configuration.get_config().dataset_config.vqa2, "train")
        expected_processors = [
            "answer_processor",
            "ocr_token_processor",
            "bbox_processor",
        ]

        # Check no processors are initialized before init_processors call
        self.assertFalse(
            any(hasattr(base_dataset, key) for key in expected_processors))

        for processor in expected_processors:
            self.assertIsNone(registry.get("{}_{}".format("vqa2", processor)))

        # Check processors are initialized after init_processors
        base_dataset.init_processors()
        self.assertTrue(
            all(hasattr(base_dataset, key) for key in expected_processors))
        for processor in expected_processors:
            self.assertIsNotNone(
                registry.get("{}_{}".format("vqa2", processor)))
示例#18
0
    def forward(self, weighted_attn):
        # Get LSTM state
        state = registry.get(f"{weighted_attn.device}_lstm_state")
        h1, c1 = state["td_hidden"]
        h2, c2 = state["lm_hidden"]

        # Language LSTM
        h2, c2 = self.language_lstm(torch.cat([weighted_attn, h1], dim=1),
                                    (h2, c2))
        predictions = self.fc(self.dropout(h2))

        # Update hidden state for t+1
        state["lm_hidden"] = (h2, c2)

        return predictions
示例#19
0
文件: metrics.py 项目: hahaxun/mmf
    def calculate(self, sample_list, model_output, *args, **kwargs):
        """Calculate vqa accuracy and return it back.

        Args:
            sample_list (SampleList): SampleList provided by DataLoader for
                                current iteration
            model_output (Dict): Dict returned by model.

        Returns:
            torch.FloatTensor: VQA Accuracy

        """
        output = model_output["scores"]
        expected = sample_list["answers"]

        answer_processor = registry.get(sample_list.dataset_name + "_answer_processor")
        answer_space_size = answer_processor.get_true_vocab_size()

        output = self._masked_unk_softmax(output, 1, 0)
        output = output.argmax(dim=1).clone().tolist()
        accuracy = []

        for idx, answer_id in enumerate(output):
            if answer_id >= answer_space_size:
                answer_id -= answer_space_size
                answer = sample_list["context_tokens"][idx][answer_id]
            else:
                answer = answer_processor.idx2word(answer_id)

            answer = self.evalai_answer_processor(answer)

            gt_answers = [self.evalai_answer_processor(x) for x in expected[idx]]
            gt_answers = list(enumerate(gt_answers))

            gt_acc = []
            for gt_answer in gt_answers:
                other_answers = [item for item in gt_answers if item != gt_answer]
                matching_answers = [item for item in other_answers if item[1] == answer]
                acc = min(1, float(len(matching_answers)) / 3)
                gt_acc.append(acc)
            avgGTAcc = float(sum(gt_acc)) / len(gt_acc)
            accuracy.append(avgGTAcc)

        accuracy = float(sum(accuracy)) / len(accuracy)

        return model_output["scores"].new_tensor(accuracy, dtype=torch.float)
示例#20
0
文件: layers.py 项目: hahaxun/mmf
    def forward(self, image_feat, embedding):
        image_feat_mean = image_feat.mean(1)

        # Get LSTM state
        state = registry.get(f"{image_feat.device}_lstm_state")
        h1, c1 = state["td_hidden"]
        h2, c2 = state["lm_hidden"]

        h1, c1 = self.top_down_lstm(
            torch.cat([h2, image_feat_mean, embedding], dim=1), (h1, c1))

        state["td_hidden"] = (h1, c1)

        image_fa = self.fa_image(image_feat)
        hidden_fa = self.fa_hidden(h1)

        joint_feature = self.relu(image_fa + hidden_fa.unsqueeze(1))
        joint_feature = self.dropout(joint_feature)

        return joint_feature
示例#21
0
    def load(self):
        # Set run type
        self.run_type = self.config.get("run_type", "train")

        # Print configuration
        configuration = registry.get("configuration", no_warning=True)
        if configuration:
            configuration.pretty_print()

        # Configure device and cudnn deterministic
        self.configure_device()
        self.configure_seed()

        # Load dataset, model, optimizer and metrics
        self.load_datasets()
        self.load_model()
        self.load_optimizer()
        self.load_metrics()

        # Initialize Callbacks
        self.configure_callbacks()
示例#22
0
def build_processors(
    processors_config: multimodelity_typings.DictConfig, registry_key: str = None, *args, **kwargs
) -> ProcessorDict:
    """Given a processor config, builds the processors present and returns back
    a dict containing processors mapped to keys as per the config

    Args:
        processors_config (multimodelity_typings.DictConfig): OmegaConf DictConfig describing
            the parameters and type of each processor passed here

        registry_key (str, optional): If passed, function would look into registry for
            this particular key and return it back. .format with processor_key will
            be called on this string. Defaults to None.

    Returns:
        ProcessorDict: Dictionary containing key to
            processor mapping
    """
    from multimodelity.datasets.processors.processors import Processor

    processor_dict = {}

    for processor_key, processor_params in processors_config.items():
        if not processor_params:
            continue

        processor_instance = None
        if registry_key is not None:
            full_key = registry_key.format(processor_key)
            processor_instance = registry.get(full_key, no_warning=True)

        if processor_instance is None:
            processor_instance = Processor(processor_params, *args, **kwargs)
            # We don't register back here as in case of hub interface, we
            # want the processors to be instantiate every time. BaseDataset
            # can register at its own end
        processor_dict[processor_key] = processor_instance

    return processor_dict
示例#23
0
文件: movie_mcan.py 项目: hahaxun/mmf
 def _build_word_embedding(self):
     assert len(self._datasets) > 0
     text_processor = registry.get(self._datasets[0] + "_text_processor")
     vocab = text_processor.vocab
     self.word_embedding = vocab.get_embedding(torch.nn.Embedding, embedding_dim=300)
示例#24
0
文件: movie_mcan.py 项目: hahaxun/mmf
 def __init__(self, config):
     super().__init__(config)
     self.config = config
     self._global_config = registry.get("config")
     self._datasets = self._global_config.datasets.split(",")
示例#25
0
 def _init_classifier(self):
     num_hidden = self.config.text_embedding.num_hidden
     num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
     dropout = self.config.classifier.dropout
     self.classifier = WeightNormClassifier(num_hidden, num_choices,
                                            num_hidden * 2, dropout)
示例#26
0
文件: checkpoint.py 项目: hahaxun/mmf
    def save(self, update, iteration=None, update_best=False):
        # Only save in main process
        if not is_master():
            return

        if not iteration:
            iteration = update

        ckpt_filepath = os.path.join(self.models_foldername,
                                     "model_%d.ckpt" % update)
        best_ckpt_filepath = os.path.join(self.ckpt_foldername,
                                          self.ckpt_prefix + "best.ckpt")
        current_ckpt_filepath = os.path.join(self.ckpt_foldername,
                                             self.ckpt_prefix + "current.ckpt")

        best_iteration = (self.trainer.early_stop_callback.early_stopping.
                          best_monitored_iteration)
        best_update = (self.trainer.early_stop_callback.early_stopping.
                       best_monitored_update)
        best_metric = (self.trainer.early_stop_callback.early_stopping.
                       best_monitored_value)
        model = self.trainer.model
        data_parallel = registry.get("data_parallel") or registry.get(
            "distributed")
        fp16_scaler = getattr(self.trainer, "scaler", None)
        fp16_scaler_dict = None

        if fp16_scaler is not None:
            fp16_scaler_dict = fp16_scaler.state_dict()

        if data_parallel is True:
            model = model.module

        ckpt = {
            "model": model.state_dict(),
            "optimizer": self.trainer.optimizer.state_dict(),
            "best_iteration": best_iteration,
            "current_iteration": iteration,
            "current_epoch": self.trainer.current_epoch,
            "num_updates": update,
            "best_update": best_update,
            "best_metric_value": best_metric,
            "fp16_scaler": fp16_scaler_dict,
            # Convert to container to avoid any dependencies
            "config": OmegaConf.to_container(self.config, resolve=True),
        }

        lr_scheduler = self.trainer.lr_scheduler_callback._scheduler
        if lr_scheduler is not None:
            ckpt["lr_scheduler"] = lr_scheduler.state_dict()

        if self.git_repo:
            git_metadata_dict = self._get_vcs_fields()
            ckpt.update(git_metadata_dict)

        with PathManager.open(ckpt_filepath, "wb") as f:
            torch.save(ckpt, f)

        if update_best:
            with PathManager.open(best_ckpt_filepath, "wb") as f:
                torch.save(ckpt, f)

        # Save current always
        with PathManager.open(current_ckpt_filepath, "wb") as f:
            torch.save(ckpt, f)

        # Remove old checkpoints if max_to_keep is set
        if self.max_to_keep > 0:
            if len(self.saved_iterations) == self.max_to_keep:
                self.remove(self.saved_iterations.pop(0))
            self.saved_iterations.append(update)
示例#27
0
文件: m4c.py 项目: hahaxun/mmf
 def __init__(self, config):
     super().__init__(config)
     self.mmt_config = BertConfig(**self.config.mmt)
     self._datasets = registry.get("config").datasets.split(",")