Пример #1
0
def main(configuration, init_distributed=False, predict=False):
    # A reload might be needed for imports
    setup_imports()
    configuration.import_user_dir()
    config = configuration.get_config()

    if torch.cuda.is_available():
        torch.cuda.set_device(config.device_id)
        torch.cuda.init()

    if init_distributed:
        distributed_init(config)

    config.training.seed = set_seed(config.training.seed)
    registry.register("seed", config.training.seed)
    print(f"Using seed {config.training.seed}")

    config = build_config(configuration)

    # Logger should be registered after config is registered
    registry.register("writer", Logger(config, name="mmf.train"))
    trainer = build_trainer(config)
    trainer.load()
    if predict:
        trainer.inference()
    else:
        trainer.train()
Пример #2
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     model_name = "vilbert"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_class = registry.get_model_class(model_name)
     self.vision_feature_size = 1024
     self.vision_target_size = 1279
     config.model_config[model_name]["training_head_type"] = "pretraining"
     config.model_config[model_name][
         "visual_embedding_dim"] = self.vision_feature_size
     config.model_config[model_name][
         "v_feature_size"] = self.vision_feature_size
     config.model_config[model_name][
         "v_target_size"] = self.vision_target_size
     config.model_config[model_name]["dynamic_attention"] = False
     self.pretrain_model = model_class(config.model_config[model_name])
     self.pretrain_model.build()
     config.model_config[model_name][
         "training_head_type"] = "classification"
     config.model_config[model_name]["num_labels"] = 2
     self.finetune_model = model_class(config.model_config[model_name])
     self.finetune_model.build()
def get_model(device, opts):
    from mmf.utils.build import build_config, build_trainer
    from mmf.common.registry import registry
    from mmf.utils.configuration import Configuration
    from mmf.utils.env import set_seed, setup_imports
    args = argparse.Namespace(config_override=None)
    args.opts = opts
    configuration = Configuration(args)
    configuration.args = args
    config = configuration.get_config()
    config.start_rank = 0
    config.device_id = 0
    setup_imports()
    configuration.import_user_dir()
    config = configuration.get_config()

    if torch.cuda.is_available():
        torch.cuda.set_device(config.device_id)
        torch.cuda.init()

    config.training.seed = set_seed(config.training.seed)
    registry.register("seed", config.training.seed)

    config = build_config(configuration)

    # Logger should be registered after config is registered
    registry.register("writer", Logger(config, name="mmf.train"))
    trainer = build_trainer(config)
    # trainer.load()
    ready_trainer(trainer)
    trainer.model.to(device)
    return trainer.model
Пример #4
0
def main(configuration, init_distributed=False, predict=False):
    # A reload might be needed for imports
    setup_imports()
    configuration.import_user_dir()
    config = configuration.get_config()

    if torch.cuda.is_available():
        torch.cuda.set_device(config.device_id)
        torch.cuda.init()

    if init_distributed:
        distributed_init(config)

    seed = config.training.seed
    config.training.seed = set_seed(seed if seed == -1 else seed + get_rank())
    registry.register("seed", config.training.seed)

    config = build_config(configuration)

    setup_logger(color=config.training.colored_logs,
                 disable=config.training.should_not_log)
    logger = logging.getLogger("mmf_cli.run")
    # Log args for debugging purposes
    logger.info(configuration.args)
    logger.info(f"Torch version: {torch.__version__}")
    log_device_names()
    logger.info(f"Using seed {config.training.seed}")

    trainer = build_trainer(config)
    trainer.load()
    if predict:
        trainer.inference()
    else:
        trainer.train()
Пример #5
0
    def setUp(self):
        test_utils.setup_proxy()
        setup_imports()
        model_name = "vinvl"
        args = test_utils.dummy_args(model=model_name, dataset="test")
        configuration = Configuration(args)
        config = configuration.get_config()
        model_config = config.model_config[model_name]
        model_config.model = model_name
        model_config.do_pretraining = False
        classification_config_dict = {
            "do_pretraining": False,
            "heads": {"mlp": {"num_labels": 3129}},
            "ce_loss": {"ignore_index": -1},
        }
        self.classification_config = OmegaConf.create(
            {**model_config, **classification_config_dict}
        )

        pretraining_config_dict = {
            "do_pretraining": True,
            "heads": {"mlm": {"hidden_size": 768}},
        }
        self.pretraining_config = OmegaConf.create(
            {**model_config, **pretraining_config_dict}
        )

        self.sample_list = self._get_sample_list()
Пример #6
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     self.model_name = "mmf_transformer"
     args = test_utils.dummy_args(model=self.model_name)
     configuration = Configuration(args)
     self.config = configuration.get_config()
     self.config.model_config[self.model_name].model = self.model_name
Пример #7
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     model_name = "vilt"
     args = test_utils.dummy_args(model=model_name, dataset="test")
     configuration = Configuration(args)
     config = configuration.get_config()
     model_config = config.model_config[model_name]
     model_config.model = model_name
     self.pretrain_model = build_model(model_config)
Пример #8
0
 def setUp(self):
     setup_imports()
     self.model_name = "mmf_transformer"
     args = test_utils.dummy_args(model=self.model_name)
     configuration = Configuration(args)
     self.config = configuration.get_config()
     self.model_class = registry.get_model_class(self.model_name)
     self.finetune_model = self.model_class(
         self.config.model_config[self.model_name])
     self.finetune_model.build()
Пример #9
0
def run(opts: typing.Optional[typing.List[str]] = None, predict: bool = False):
    """Run starts a job based on the command passed from the command line.
    You can optionally run the mmf job programmatically by passing an optlist as opts.

    Args:
        opts (typing.Optional[typing.List[str]], optional): Optlist which can be used.
            to override opts programmatically. For e.g. if you pass
            opts = ["training.batch_size=64", "checkpoint.resume=True"], this will
            set the batch size to 64 and resume from the checkpoint if present.
            Defaults to None.
        predict (bool, optional): If predict is passed True, then the program runs in
            prediction mode. Defaults to False.
    """
    setup_imports()

    if opts is None:
        parser = flags.get_parser()
        args = parser.parse_args()
    else:
        args = argparse.Namespace(config_override=None)
        args.opts = opts

    print(args)
    configuration = Configuration(args)
    # Do set runtime args which can be changed by MMF
    configuration.args = args
    config = configuration.get_config()
    config.start_rank = 0
    if config.distributed.init_method is None:
        infer_init_method(config)

    if config.distributed.init_method is not None:
        if torch.cuda.device_count() > 1 and not config.distributed.no_spawn:
            config.start_rank = config.distributed.rank
            config.distributed.rank = None
            torch.multiprocessing.spawn(
                fn=distributed_main,
                args=(configuration, predict),
                nprocs=torch.cuda.device_count(),
            )
        else:
            distributed_main(0, configuration, predict)
    elif config.distributed.world_size > 1:
        assert config.distributed.world_size <= torch.cuda.device_count()
        port = random.randint(10000, 20000)
        config.distributed.init_method = f"tcp://localhost:{port}"
        config.distributed.rank = None
        torch.multiprocessing.spawn(
            fn=distributed_main,
            args=(configuration, predict),
            nprocs=config.distributed.world_size,
        )
    else:
        config.device_id = 0
        main(configuration, predict=predict)
Пример #10
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     replace_with_jit()
     model_name = "visual_bert"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_config = config.model_config[model_name]
     model_config.model = model_name
     self.pretrain_model = build_model(model_config)
Пример #11
0
 def setUp(self):
     setup_imports()
     model_name = "mmbt"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_class = registry.get_model_class(model_name)
     config.model_config[model_name]["training_head_type"] = "classification"
     config.model_config[model_name]["num_labels"] = 2
     self.finetune_model = model_class(config.model_config[model_name])
     self.finetune_model.build()
Пример #12
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     model_name = "mmbt"
     args = test_utils.dummy_args(model=model_name)
     configuration = Configuration(args)
     config = configuration.get_config()
     model_config = config.model_config[model_name]
     model_config["training_head_type"] = "classification"
     model_config["num_labels"] = 2
     model_config.model = model_name
     self.finetune_model = build_model(model_config)
Пример #13
0
    def setUp(self):
        test_utils.setup_proxy()
        setup_imports()
        model_name = "uniter"
        args = test_utils.dummy_args(model=model_name, dataset="vqa2")
        configuration = Configuration(args)
        config = configuration.get_config()
        model_config = config.model_config[model_name]
        model_config.model = model_name
        model_config.losses = {"vqa2": "logit_bce"}
        model_config.do_pretraining = False
        model_config.tasks = "vqa2"
        classification_config_dict = {
            "do_pretraining": False,
            "tasks": "vqa2",
            "heads": {
                "vqa2": {
                    "type": "mlp",
                    "num_labels": 3129
                }
            },
            "losses": {
                "vqa2": "logit_bce"
            },
        }
        classification_config = OmegaConf.create({
            **model_config,
            **classification_config_dict
        })

        pretraining_config_dict = {
            "do_pretraining": True,
            "tasks": "wra",
            "heads": {
                "wra": {
                    "type": "wra"
                }
            },
        }
        pretraining_config = OmegaConf.create({
            **model_config,
            **pretraining_config_dict
        })

        self.model_for_classification = build_model(classification_config)
        self.model_for_pretraining = build_model(pretraining_config)
Пример #14
0
    def initialize(self, ctx):
        self.manifest = ctx.manifest
        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        serialized_file = self.manifest['model']['serializedFile']
        model_pt_path = os.path.join(model_dir, serialized_file)
        self.map_location = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(self.map_location + ":" +
                                   str(properties.get("gpu_id")) if torch.cuda.
                                   is_available() else self.map_location)

        # reading the csv file which include all the labels in the dataset to make the class/index mapping
        # and matching the output of the model with num labels from dataset
        df = pd.read_csv('./charades_action_lables.csv')
        label_set = set()
        df['action_labels'] = df['action_labels'].str.replace('"', '')
        labels_initial = df['action_labels'].tolist()
        labels = []
        for sublist in labels_initial:
            new_sublist = ast.literal_eval(sublist)
            labels.append(new_sublist)
            for item in new_sublist:
                label_set.add(item)
        classes = sorted(list(label_set))
        self.class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.classes = classes
        self.labels = labels
        self.idx_to_class = classes
        config = OmegaConf.load('config.yaml')
        print("*********** config keyssss **********", config.keys())
        setup_very_basic_config()
        setup_imports()
        self.model = MMFTransformer(config.model_config.mmf_transformer)
        self.model.build()
        self.model.init_losses()
        self.processor = build_processors(
            config.dataset_config["charades"].processors)
        state_dict = torch.load(serialized_file, map_location=self.device)
        self.model.load_state_dict(state_dict)
        self.model.to(self.device)
        self.model.eval()
        self.initialized = True
        print(
            "********* files in temp direcotry that .mar file got extracted *********",
            os.listdir(model_dir))
Пример #15
0
    def setUp(self):
        test_utils.setup_proxy()
        setup_imports()
        model_name = "vilbert"
        args = test_utils.dummy_args(model=model_name)
        configuration = Configuration(args)
        config = configuration.get_config()
        self.vision_feature_size = 1024
        self.vision_target_size = 1279
        model_config = config.model_config[model_name]
        model_config["training_head_type"] = "pretraining"
        model_config["visual_embedding_dim"] = self.vision_feature_size
        model_config["v_feature_size"] = self.vision_feature_size
        model_config["v_target_size"] = self.vision_target_size
        model_config["dynamic_attention"] = False
        model_config.model = model_name

        model_config["training_head_type"] = "classification"
        model_config["num_labels"] = 2
        self.model_config = model_config
Пример #16
0
 def setUp(self):
     test_utils.setup_proxy()
     setup_imports()
     self._image_modality_config = MMFTransformerModalityConfig(
         type="image",
         key="image",
         embedding_dim=256,
         position_dim=1,
         segment_id=0,
         encoder=ImageEncoderFactory.Config(
             type=ImageEncoderTypes.identity),
     )
     self._text_modality_config = MMFTransformerModalityConfig(
         type="text",
         key="text",
         embedding_dim=756,
         position_dim=128,
         segment_id=1,
         encoder=TextEncoderFactory.Config(type=TextEncoderTypes.identity),
     )
Пример #17
0
 def setUp(self):
     setup_imports()
     torch.manual_seed(1234)
     config_path = os.path.join(
         get_mmf_root(),
         "..",
         "projects",
         "butd",
         "configs",
         "coco",
         "beam_search.yaml",
     )
     config_path = os.path.abspath(config_path)
     args = dummy_args(model="butd", dataset="coco")
     args.opts.append(f"config={config_path}")
     configuration = Configuration(args)
     configuration.config.datasets = "coco"
     configuration.freeze()
     self.config = configuration.config
     registry.register("config", self.config)
Пример #18
0
 def setUp(self):
     setup_imports()
     torch.manual_seed(1234)
     config_path = os.path.join(
         get_mmf_root(),
         "..",
         "projects",
         "butd",
         "configs",
         "coco",
         "nucleus_sampling.yaml",
     )
     config_path = os.path.abspath(config_path)
     args = dummy_args(model="butd", dataset="coco")
     args.opts.append(f"config={config_path}")
     configuration = Configuration(args)
     configuration.config.datasets = "coco"
     configuration.config.model_config.butd.inference.params.sum_threshold = 0.5
     configuration.freeze()
     self.config = configuration.config
     registry.register("config", self.config)
Пример #19
0
def run(predict=False):
    setup_imports()
    parser = flags.get_parser()
    args = parser.parse_args()
    print(args)
    configuration = Configuration(args)
    # Do set runtime args which can be changed by MMF
    configuration.args = args
    config = configuration.get_config()
    config.start_rank = 0
    if config.distributed.init_method is None:
        infer_init_method(config)

    if config.distributed.init_method is not None:
        if torch.cuda.device_count() > 1 and not config.distributed.no_spawn:
            config.start_rank = config.distributed.rank
            config.distributed.rank = None
            torch.multiprocessing.spawn(
                fn=distributed_main,
                args=(configuration, predict),
                nprocs=torch.cuda.device_count(),
            )
        else:
            distributed_main(0, configuration, predict)
    elif config.distributed.world_size > 1:
        assert config.distributed.world_size <= torch.cuda.device_count()
        port = random.randint(10000, 20000)
        config.distributed.init_method = f"tcp://localhost:{port}"
        config.distributed.rank = None
        torch.multiprocessing.spawn(
            fn=distributed_main,
            args=(configuration, predict),
            nprocs=config.distributed.world_size,
        )
    else:
        config.device_id = 0
        main(configuration, predict=predict)
Пример #20
0
        if ("writer" in cls.mapping["state"] and value == default
                and no_warning is False):
            cls.mapping["state"]["writer"].warning(
                "Key {} is not present in registry, returning default value "
                "of {}".format(original_name, default))
        return value

    @classmethod
    def unregister(cls, name):
        r"""Remove an item from registry with key 'name'

        Args:
            name: Key which needs to be removed.
        Usage::

            from mmf.common.registry import registry

            config = registry.unregister("config")
        """
        return cls.mapping["state"].pop(name, None)


registry = Registry()

# Only setup imports in main process, this means registry won't be
# fully available in spawned child processes (such as dataloader processes)
# but instantiated. This is to prevent issues such as
# https://github.com/facebookresearch/mmf/issues/355
if __name__ == "__main__":
    setup_imports()
Пример #21
0
 def setUp(self):
     setup_imports()
     self._initial_modules = set(sys.modules)
     self._sanitize_registry()
Пример #22
0
 def setUp(self):
     setup_imports()