Exemplo n.º 1
0
    def __init__(self,
                 num_train_data,
                 max_updates,
                 max_epochs,
                 device="cuda",
                 fp16_model=False):
        config = get_config_with_defaults({
            "training": {
                "max_updates": max_updates,
                "max_epochs": max_epochs,
                "evaluation_interval": 10000,
                "fp16": True,
            },
            "run_type": "train",
        })
        super().__init__(num_train_data, config=config)
        if fp16_model:
            assert (torch.cuda.is_available()
                    ), "MMFTrainerMock fp16 requires cuda enabled"
            model = SimpleModelWithFp16Assert({"in_dim": 1})
            model.build()
            model = model.cuda()
        else:
            model = SimpleModel({"in_dim": 1})
            model.build()
            model.train()
            model.to(self.device)

        self.model = model
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-3)
Exemplo n.º 2
0
 def _train_with_condition(
     self,
     num_train_data,
     max_updates,
     max_epochs,
     update_frequency,
     batch_size,
     on_update_end_fn=None,
 ):
     torch.random.manual_seed(2)
     model = SimpleModel({"in_dim": 1})
     model.build()
     opt = torch.optim.SGD(model.parameters(), lr=0.01)
     trainer = TrainerTrainingLoopMock(
         num_train_data,
         max_updates,
         max_epochs,
         optimizer=opt,
         update_frequency=update_frequency,
         batch_size=batch_size,
         on_update_end_fn=on_update_end_fn,
     )
     model.to(trainer.device)
     trainer.model = model
     trainer.training_loop()
     return trainer
Exemplo n.º 3
0
    def __init__(self, num_train_data, max_updates, max_epochs):
        self.training_config = OmegaConf.create({
            "detect_anomaly": False,
            "evaluation_interval": 10000
        })
        if max_updates is not None:
            self.training_config["max_updates"] = max_updates
        if max_epochs is not None:
            self.training_config["max_epochs"] = max_epochs

        self.model = SimpleModel(1)
        if torch.cuda.is_available():
            self.model = self.model.cuda()

        self.dataset_loader = MagicMock()
        self.dataset_loader.seed_sampler = MagicMock(return_value=None)
        self.dataset_loader.prepare_batch = lambda x: SampleList(x)
        self.optimizer = MagicMock()
        self.optimizer.step = MagicMock(return_value=None)
        self.optimizer.zero_grad = MagicMock(return_value=None)
        dataset = NumbersDataset(num_train_data)
        self.train_loader = torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=1,
            shuffle=False,
            num_workers=1,
            drop_last=False,
        )
        self.on_batch_start = MagicMock(return_value=None)
        self.logistics_callback = MagicMock(return_value=None)
        self.logistics_callback.log_interval = MagicMock(return_value=None)
        self.on_batch_end = MagicMock(return_value=None)
        self.meter = MagicMock(return_value=None)
        self.after_training_loop = MagicMock(return_value=None)
Exemplo n.º 4
0
def get_mmf_trainer(
    model_size=1,
    num_data_size=100,
    max_updates=5,
    max_epochs=None,
    on_update_end_fn=None,
    fp16=False,
    scheduler_config=None,
    grad_clipping_config=None,
):
    torch.random.manual_seed(2)
    model = SimpleModel(model_size)
    model.train()
    trainer_config = get_trainer_config()
    optimizer = build_optimizer(model, trainer_config)
    trainer = TrainerTrainingLoopMock(
        num_data_size,
        max_updates,
        max_epochs,
        config=trainer_config,
        optimizer=optimizer,
        on_update_end_fn=on_update_end_fn,
        fp16=fp16,
        scheduler_config=scheduler_config,
        grad_clipping_config=grad_clipping_config,
    )
    model.to(trainer.device)
    trainer.model = model
    return trainer
Exemplo n.º 5
0
    def test_batch_size_per_device(self, a):
        # Need to patch the mmf.utils.general's world size not mmf.utils.distributed
        # as the first one is what will be used
        with patch("mmf.utils.general.get_world_size", return_value=2):
            config = self._get_config(max_updates=2,
                                      max_epochs=None,
                                      batch_size=4)
            trainer = TrainerTrainingLoopMock(config=config)
            add_model(trainer, SimpleModel({"in_dim": 1}))
            add_optimizer(trainer, config)
            registry.register("config", trainer.config)
            batch_size = get_batch_size()
            trainer.config.training.batch_size = batch_size
            trainer.load_datasets()
            # Train loader has batch size per device, for global batch size 4
            # with world size 2, batch size per device should 4 // 2 = 2
            self.assertEqual(trainer.train_loader.current_loader.batch_size, 2)
            # This is per device, so should stay same
            config = self._get_config(max_updates=2,
                                      max_epochs=None,
                                      batch_size_per_device=4)
            trainer = TrainerTrainingLoopMock(config=config)
            add_model(trainer, SimpleModel({"in_dim": 1}))
            add_optimizer(trainer, config)
            registry.register("config", trainer.config)
            batch_size = get_batch_size()
            trainer.config.training.batch_size = batch_size
            trainer.load_datasets()
            self.assertEqual(trainer.train_loader.current_loader.batch_size, 4)

        max_updates = trainer._calculate_max_updates()
        self.assertEqual(max_updates, 2)

        self.check_values(trainer, 0, 0, 0)
        trainer.training_loop()
        self.check_values(trainer, 2, 1, 2)
Exemplo n.º 6
0
def get_mmf_trainer(config=None,
                    model_size=1,
                    num_data_size=100,
                    load_model_from_config=False,
                    seed=2):
    torch.random.manual_seed(seed)
    trainer = TrainerTrainingLoopMock(num_data_size, config=config)

    if not load_model_from_config:
        add_model(trainer, SimpleModel({"in_dim": model_size}))
    else:
        trainer.load_model()

    add_optimizer(trainer, config)

    trainer.load_datasets()
    return trainer
Exemplo n.º 7
0
 def __init__(
     self,
     config,
     num_train_data,
     max_updates,
     max_epochs,
     device="cuda",
     fp16_model=False,
 ):
     super().__init__(num_train_data, max_updates, max_epochs, fp16=True)
     self.device = torch.device(device)
     self.config = config
     self.model = SimpleModel(1)
     self.model = self.model.cuda()
     self.optimizer = build_optimizer(self.model, self.config)
     self.distributed = True
     self.local_rank = 0
     self.parallelize_model()
     self.load_fp16_scaler()
Exemplo n.º 8
0
    def setUp(self):
        self.trainer = argparse.Namespace()
        self.config = load_yaml(os.path.join("configs", "defaults.yaml"))
        self.config = OmegaConf.merge(
            self.config,
            {
                "model": "simple",
                "model_config": {},
                "training": {
                    "lr_scheduler": True,
                    "lr_ratio": 0.1,
                    "lr_steps": [1, 2],
                    "use_warmup": False,
                    "callbacks": [{
                        "type": "test_callback",
                        "params": {}
                    }],
                },
            },
        )
        # Keep original copy for testing purposes
        self.trainer.config = deepcopy(self.config)
        registry.register("config", self.trainer.config)

        model = SimpleModel(SimpleModel.Config())
        model.build()
        self.trainer.model = model
        self.trainer.val_loader = torch.utils.data.DataLoader(
            NumbersDataset(2), batch_size=self.config.training.batch_size)

        self.trainer.optimizer = torch.optim.Adam(
            self.trainer.model.parameters(), lr=1e-01)
        self.trainer.lr_scheduler_callback = LRSchedulerCallback(
            self.config, self.trainer)

        self.trainer.callbacks = []
        for callback in self.config.training.get("callbacks", []):
            callback_type = callback.type
            callback_param = callback.params
            callback_cls = registry.get_callback_class(callback_type)
            self.trainer.callbacks.append(
                callback_cls(self.trainer.config, self.trainer,
                             **callback_param))
Exemplo n.º 9
0
    def __init__(self,
                 config,
                 num_train_data,
                 max_updates,
                 max_epochs,
                 device="cuda"):
        config.training.max_updates = max_updates
        config.training.max_epochs = max_epochs
        config.training.fp16 = True
        config = get_config_with_defaults(config)

        super().__init__(num_train_data, config=config)
        self.device = torch.device(device)
        self.config = config
        self.model = SimpleModel({"in_dim": 1})
        self.model.build()
        self.model = self.model.cuda()
        self.optimizer = build_optimizer(self.model, self.config)
        self.distributed = True
        self.local_rank = 0
        self.parallelize_model()
        self.load_fp16_scaler()
Exemplo n.º 10
0
def get_mmf_trainer(
    model_size=1,
    num_data_size=100,
    max_updates=5,
    max_epochs=None,
    on_update_end_fn=None,
    fp16=False,
    scheduler_config=None,
    grad_clipping_config=None,
    evaluation_interval=4,
    log_interval=1,
    batch_size=1,
    tensorboard=False,
):
    torch.random.manual_seed(2)
    model = SimpleModel({"in_dim": model_size})
    model.build()
    model.train()
    trainer_config = get_trainer_config()
    trainer_config.training.evaluation_interval = evaluation_interval
    trainer_config.training.log_interval = log_interval
    optimizer = build_optimizer(model, trainer_config)
    trainer = TrainerTrainingLoopMock(
        num_data_size,
        max_updates,
        max_epochs,
        config=trainer_config,
        optimizer=optimizer,
        fp16=fp16,
        on_update_end_fn=on_update_end_fn,
        scheduler_config=scheduler_config,
        grad_clipping_config=grad_clipping_config,
        batch_size=batch_size,
        tensorboard=tensorboard,
    )
    trainer.load_datasets()
    model.to(trainer.device)
    trainer.model = model
    return trainer
Exemplo n.º 11
0
    def __init__(
        self,
        num_train_data,
        max_updates,
        max_epochs,
        config=None,
        optimizer=None,
        update_frequency=1,
        batch_size=1,
        batch_size_per_device=None,
        fp16=False,
        on_update_end_fn=None,
        scheduler_config=None,
        grad_clipping_config=None,
    ):
        if config is None:
            self.config = OmegaConf.create(
                {
                    "training": {
                        "detect_anomaly": False,
                        "evaluation_interval": 10000,
                        "update_frequency": update_frequency,
                        "fp16": fp16,
                        "batch_size": batch_size,
                        "batch_size_per_device": batch_size_per_device,
                    }
                }
            )
            self.training_config = self.config.training
        else:
            self.training_config = config.training
            self.config = config

        # Load batch size with custom config and cleanup
        original_config = registry.get("config")
        registry.register("config", self.config)
        batch_size = get_batch_size()
        registry.register("config", original_config)

        if max_updates is not None:
            self.training_config["max_updates"] = max_updates
        if max_epochs is not None:
            self.training_config["max_epochs"] = max_epochs

        self.model = SimpleModel({"in_dim": 1})
        self.model.build()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            self.device = "cuda"
        else:
            self.device = "cpu"
        self.distributed = False

        self.dataset_loader = MagicMock()
        self.dataset_loader.seed_sampler = MagicMock(return_value=None)
        self.dataset_loader.prepare_batch = lambda x: SampleList(x)
        if optimizer is None:
            self.optimizer = MagicMock()
            self.optimizer.step = MagicMock(return_value=None)
            self.optimizer.zero_grad = MagicMock(return_value=None)
        else:
            self.optimizer = optimizer

        if scheduler_config:
            config.training.lr_scheduler = True
            config.scheduler = scheduler_config
            self.lr_scheduler_callback = LRSchedulerCallback(config, self)
            self.callbacks.append(self.lr_scheduler_callback)
            on_update_end_fn = (
                on_update_end_fn
                if on_update_end_fn
                else self.lr_scheduler_callback.on_update_end
            )

        if grad_clipping_config:
            self.training_config.clip_gradients = True
            self.training_config.max_grad_l2_norm = grad_clipping_config[
                "max_grad_l2_norm"
            ]
            self.training_config.clip_norm_mode = grad_clipping_config["clip_norm_mode"]

        dataset = NumbersDataset(num_train_data)
        self.train_loader = torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=1,
            drop_last=False,
        )
        self.train_loader.current_dataset = dataset
        self.on_batch_start = MagicMock(return_value=None)
        self.on_update_start = MagicMock(return_value=None)
        self.logistics_callback = MagicMock(return_value=None)
        self.logistics_callback.log_interval = MagicMock(return_value=None)
        self.on_batch_end = MagicMock(return_value=None)
        self.on_update_end = (
            on_update_end_fn if on_update_end_fn else MagicMock(return_value=None)
        )
        self.meter = Meter()
        self.after_training_loop = MagicMock(return_value=None)
        self.on_validation_start = MagicMock(return_value=None)
        self.evaluation_loop = MagicMock(return_value=(None, None))
        self.scaler = torch.cuda.amp.GradScaler(enabled=False)
        self.val_loader = MagicMock(return_value=None)
        self.early_stop_callback = MagicMock(return_value=None)
        self.on_validation_end = MagicMock(return_value=None)
        self.metrics = MagicMock(return_value=None)
Exemplo n.º 12
0
 def test_build_optimizer_simple_model(self):
     model = SimpleModel({"in_dim": 1})
     model.build()
     optimizer = build_optimizer(model, self.config)
     self.assertTrue(isinstance(optimizer, torch.optim.Optimizer))
     self.assertEqual(len(optimizer.param_groups), 1)
Exemplo n.º 13
0
    def __init__(
        self,
        num_train_data,
        max_updates,
        max_epochs,
        config=None,
        optimizer=None,
        update_frequency=1,
        batch_size=1,
        batch_size_per_device=None,
        fp16=False,
        on_update_end_fn=None,
        scheduler_config=None,
        grad_clipping_config=None,
        tensorboard=False,
    ):
        if config is None:
            self.config = OmegaConf.create({
                "training": {
                    "detect_anomaly": False,
                    "evaluation_interval": 10000,
                    "update_frequency": update_frequency,
                    "fp16": fp16,
                    "batch_size": batch_size,
                    "batch_size_per_device": batch_size_per_device,
                    "tensorboard": tensorboard,
                }
            })
            self.training_config = self.config.training
        else:
            config.training.batch_size = batch_size
            config.training.fp16 = fp16
            config.training.update_frequency = update_frequency
            config.training.tensorboard = tensorboard
            self.training_config = config.training
            self.config = config

        registry.register("config", self.config)

        if max_updates is not None:
            self.training_config["max_updates"] = max_updates
        if max_epochs is not None:
            self.training_config["max_epochs"] = max_epochs
        self.model = SimpleModel({"in_dim": 1})
        self.model.build()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            self.device = "cuda"
        else:
            self.device = "cpu"
        self.distributed = False

        if optimizer is None:
            self.optimizer = MagicMock()
            self.optimizer.step = MagicMock(return_value=None)
            self.optimizer.zero_grad = MagicMock(return_value=None)
        else:
            self.optimizer = optimizer

        if scheduler_config:
            config.training.lr_scheduler = True
            config.scheduler = scheduler_config
            self.lr_scheduler_callback = LRSchedulerCallback(config, self)
            self.callbacks.append(self.lr_scheduler_callback)
            on_update_end_fn = (on_update_end_fn if on_update_end_fn else
                                self.lr_scheduler_callback.on_update_end)
        if grad_clipping_config:
            self.training_config.clip_gradients = True
            self.training_config.max_grad_l2_norm = grad_clipping_config[
                "max_grad_l2_norm"]
            self.training_config.clip_norm_mode = grad_clipping_config[
                "clip_norm_mode"]

        self.on_batch_start = MagicMock(return_value=None)
        self.on_update_start = MagicMock(return_value=None)
        self.logistics_callback = MagicMock(return_value=None)
        self.logistics_callback.log_interval = MagicMock(return_value=None)
        self.on_batch_end = MagicMock(return_value=None)
        self.on_update_end = (on_update_end_fn if on_update_end_fn else
                              MagicMock(return_value=None))
        self.after_training_loop = MagicMock(return_value=None)
        self.on_validation_start = MagicMock(return_value=None)
        self.scaler = torch.cuda.amp.GradScaler(enabled=False)
        self.early_stop_callback = MagicMock(return_value=None)
        self.on_validation_end = MagicMock(return_value=None)
        self.metrics = MagicMock(return_value={})

        self.num_data = num_train_data
Exemplo n.º 14
0
    def __init__(
        self,
        num_train_data,
        max_updates,
        max_epochs,
        config=None,
        optimizer=None,
        update_frequency=1,
        batch_size=1,
        fp16=False,
        on_update_end_fn=None,
    ):
        if config is None:
            self.training_config = OmegaConf.create({
                "detect_anomaly": False,
                "evaluation_interval": 10000,
                "update_frequency": update_frequency,
                "fp16": fp16,
                "batch_size": batch_size,
            })
        else:
            self.training_config = config.training

        if max_updates is not None:
            self.training_config["max_updates"] = max_updates
        if max_epochs is not None:
            self.training_config["max_epochs"] = max_epochs

        self.model = SimpleModel(1)
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            self.device = "cuda"
        else:
            self.device = "cpu"

        self.dataset_loader = MagicMock()
        self.dataset_loader.seed_sampler = MagicMock(return_value=None)
        self.dataset_loader.prepare_batch = lambda x: SampleList(x)
        if optimizer is None:
            self.optimizer = MagicMock()
            self.optimizer.step = MagicMock(return_value=None)
            self.optimizer.zero_grad = MagicMock(return_value=None)
        else:
            self.optimizer = optimizer

        dataset = NumbersDataset(num_train_data)
        self.train_loader = torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=1,
            drop_last=False,
        )
        self.train_loader.current_dataset = dataset
        self.on_batch_start = MagicMock(return_value=None)
        self.on_update_start = MagicMock(return_value=None)
        self.logistics_callback = MagicMock(return_value=None)
        self.logistics_callback.log_interval = MagicMock(return_value=None)
        self.on_batch_end = MagicMock(return_value=None)
        self.on_update_end = (on_update_end_fn if on_update_end_fn else
                              MagicMock(return_value=None))
        self.meter = Meter()
        self.after_training_loop = MagicMock(return_value=None)
        self.on_validation_start = MagicMock(return_value=None)
        self.evaluation_loop = MagicMock(return_value=(None, None))
        self.scaler = torch.cuda.amp.GradScaler(enabled=False)
        self.val_loader = MagicMock(return_value=None)
        self.early_stop_callback = MagicMock(return_value=None)
        self.on_validation_end = MagicMock(return_value=None)
        self.metrics = MagicMock(return_value=None)