def test_lr_scheduler_with_no_actual_scheduler_raises(tmpdir):
    """Test exception when lr_scheduler dict has no scheduler."""
    model = BoringModel()
    model.configure_optimizers = lambda: {"optimizer": optim.Adam(model.parameters()), "lr_scheduler": {}}
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    with pytest.raises(MisconfigurationException, match='The lr scheduler dict must have the key "scheduler"'):
        trainer.fit(model)
def test_multiple_optimizer_config_dicts_with_extra_keys_warns(tmpdir):
    """Test exception when multiple optimizer configuration dicts have extra keys."""
    model = BoringModel()
    optimizer1 = optim.Adam(model.parameters(), lr=0.01)
    optimizer2 = optim.Adam(model.parameters(), lr=0.01)
    lr_scheduler_config_1 = {
        "scheduler": optim.lr_scheduler.StepLR(optimizer1, 1)
    }
    lr_scheduler_config_2 = {
        "scheduler": optim.lr_scheduler.StepLR(optimizer2, 1)
    }
    optim_conf = [
        {
            "optimizer": optimizer1,
            "lr_scheduler": lr_scheduler_config_1,
            "foo": 1,
            "bar": 2
        },
        {
            "optimizer": optimizer2,
            "lr_scheduler": lr_scheduler_config_2,
            "foo": 1,
            "bar": 2
        },
    ]
    with pytest.warns(
            RuntimeWarning,
            match=
            r"Found unsupported keys in the optimizer configuration: \{.+\}"):
        TrainerOptimizersMixin._configure_optimizers(optim_conf)
def test_onecyclelr_with_epoch_interval_warns():
    """Test warning when a OneCycleLR is used and interval is epoch."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    lr_scheduler = {"scheduler": optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.01, total_steps=3)}
    with pytest.warns(RuntimeWarning, match="Are you sure you didn't mean 'interval': 'step'?"):
        TrainerOptimizersMixin._configure_schedulers([lr_scheduler], None, False)
def test_reducelronplateau_with_no_monitor_raises(tmpdir):
    """Test exception when a ReduceLROnPlateau is used with no monitor."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    model.configure_optimizers = lambda: ([optimizer], [optim.lr_scheduler.ReduceLROnPlateau(optimizer)])
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    with pytest.raises(
        MisconfigurationException, match="`configure_optimizers` must include a monitor when a `ReduceLROnPlateau`"
    ):
        trainer.fit(model)
def test_reducelronplateau_with_no_monitor_in_lr_scheduler_dict_raises(tmpdir):
    """Test exception when lr_scheduler dict has a ReduceLROnPlateau with no monitor."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    model.configure_optimizers = lambda: {
        "optimizer": optimizer,
        "lr_scheduler": {"scheduler": optim.lr_scheduler.ReduceLROnPlateau(optimizer)},
    }
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    with pytest.raises(MisconfigurationException, match="must include a monitor when a `ReduceLROnPlateau`"):
        trainer.fit(model)
def test_lr_scheduler_with_extra_keys_warns(tmpdir):
    """Test warning when lr_scheduler dict has extra keys."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    model.configure_optimizers = lambda: {
        "optimizer": optimizer,
        "lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1), "foo": 1, "bar": 2},
    }
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    with pytest.warns(RuntimeWarning, match=r"Found unsupported keys in the lr scheduler dict: \[.+\]"):
        trainer.fit(model)
def test_lr_scheduler_with_unknown_interval_raises(tmpdir):
    """Test exception when lr_scheduler dict has unknown interval param value."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    model.configure_optimizers = lambda: {
        "optimizer": optimizer,
        "lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1), "interval": "incorrect_unknown_value"},
    }
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    with pytest.raises(MisconfigurationException, match=r'The "interval" key in lr scheduler dict must be'):
        trainer.fit(model)
示例#8
0
def test_lr_scheduler_strict(step_mock, tmpdir, complete_epoch):
    """Test "strict" support in lr_scheduler dict."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)
    max_epochs = 1 if complete_epoch else None
    max_steps = -1 if complete_epoch else 1
    trainer = Trainer(default_root_dir=tmpdir,
                      max_epochs=max_epochs,
                      max_steps=max_steps)

    model.configure_optimizers = lambda: {
        "optimizer": optimizer,
        "lr_scheduler": {
            "scheduler": scheduler,
            "monitor": "giraffe",
            "strict": True
        },
    }

    if complete_epoch:
        with pytest.raises(
                MisconfigurationException,
                match=
                r"ReduceLROnPlateau conditioned on metric .* which is not available\. Available metrics are:",
        ):
            trainer.fit(model)
    else:
        trainer.fit(model)

    step_mock.assert_not_called()

    model.configure_optimizers = lambda: {
        "optimizer": optimizer,
        "lr_scheduler": {
            "scheduler": scheduler,
            "monitor": "giraffe",
            "strict": False
        },
    }

    if complete_epoch:
        trainer = Trainer(default_root_dir=tmpdir,
                          max_epochs=max_epochs,
                          max_steps=max_steps)
        with pytest.warns(
                RuntimeWarning,
                match=
                r"ReduceLROnPlateau conditioned on metric .* which is not available but strict"
        ):
            trainer.fit(model)

    step_mock.assert_not_called()
def test_optimizer_config_dict_with_extra_keys_warns(tmpdir):
    """Test exception when optimizer configuration dict has extra keys."""
    model = BoringModel()
    optimizer = optim.Adam(model.parameters())
    optim_conf = {
        "optimizer": optimizer,
        "lr_scheduler": {"scheduler": optim.lr_scheduler.StepLR(optimizer, 1)},
        "foo": 1,
        "bar": 2,
    }
    with pytest.warns(RuntimeWarning, match=r"Found unsupported keys in the optimizer configuration: \{.+\}"):
        TrainerOptimizersMixin._configure_optimizers(optim_conf)
示例#10
0
def test_ddp_sharded_strategy_checkpoint_multi_gpu(tmpdir):
    """Test to ensure that checkpoint is saved correctly when using multiple GPUs."""
    model = BoringModel()
    trainer = Trainer(gpus=2, strategy="ddp_sharded_spawn", fast_dev_run=True)

    trainer.fit(model)

    checkpoint_path = os.path.join(tmpdir, "model.pt")
    trainer.save_checkpoint(checkpoint_path)
    saved_model = BoringModel.load_from_checkpoint(checkpoint_path)

    # Assert model parameters are identical after loading
    for ddp_param, shard_param in zip(model.parameters(), saved_model.parameters()):
        assert torch.equal(ddp_param.to("cpu"), shard_param)
def test_async_algorithm(tmpdir):
    model = BoringModel()
    bagua_strategy = BaguaStrategy(algorithm="async")
    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=1,
        strategy=bagua_strategy,
        accelerator="gpu",
        devices=2,
    )
    trainer.fit(model)

    for param in model.parameters():
        assert torch.norm(param) < 3
示例#12
0
def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
    """
    Test to ensure that checkpoint is saved correctly
    """
    model = BoringModel()
    trainer = Trainer(accelerator="ddp_sharded_spawn", num_processes=2, fast_dev_run=True)

    trainer.fit(model)

    checkpoint_path = os.path.join(tmpdir, "model.pt")
    trainer.save_checkpoint(checkpoint_path)
    saved_model = BoringModel.load_from_checkpoint(checkpoint_path)

    # Assert model parameters are identical after loading
    for ddp_param, shard_param in zip(model.parameters(), saved_model.parameters()):
        assert torch.equal(ddp_param.to("cpu"), shard_param)
示例#13
0
def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
    """
        Test to ensure that checkpoint is saved correctly when using multiple GPUs
    """
    model = BoringModel()
    trainer = Trainer(
        gpus=2,
        accelerator='ddp_spawn',
        plugins=[DDPShardedPlugin()],
        fast_dev_run=True,
    )

    trainer.fit(model)

    checkpoint_path = os.path.join(tmpdir, 'model.pt')
    trainer.save_checkpoint(checkpoint_path)
    saved_model = BoringModel.load_from_checkpoint(checkpoint_path)

    # Assert model parameters are identical after loading
    for ddp_param, shard_param in zip(model.parameters(),
                                      saved_model.parameters()):
        assert torch.equal(ddp_param, shard_param)
def test_optimizer_return_options(tmpdir):
    trainer = Trainer(default_root_dir=tmpdir)
    model = BoringModel()

    # single optimizer
    opt_a = optim.Adam(model.parameters(), lr=0.002)
    opt_b = optim.SGD(model.parameters(), lr=0.002)
    scheduler_a = optim.lr_scheduler.StepLR(opt_a, 10)
    scheduler_b = optim.lr_scheduler.StepLR(opt_b, 10)

    # single optimizer
    model.configure_optimizers = lambda: opt_a
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert len(opt) == 1 and len(lr_sched) == len(freq) == 0

    # opt tuple
    model.configure_optimizers = lambda: (opt_a, opt_b)
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert opt == [opt_a, opt_b]
    assert len(lr_sched) == len(freq) == 0

    # opt list
    model.configure_optimizers = lambda: [opt_a, opt_b]
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert opt == [opt_a, opt_b]
    assert len(lr_sched) == len(freq) == 0

    ref_lr_sched = dict(
        scheduler=scheduler_a,
        interval="epoch",
        frequency=1,
        reduce_on_plateau=False,
        monitor=None,
        strict=True,
        name=None,
        opt_idx=None,
    )

    # opt tuple of 2 lists
    model.configure_optimizers = lambda: ([opt_a], [scheduler_a])
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert len(opt) == len(lr_sched) == 1
    assert len(freq) == 0
    assert opt[0] == opt_a
    assert lr_sched[0] == ref_lr_sched

    # opt tuple of 1 list
    model.configure_optimizers = lambda: ([opt_a], scheduler_a)
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert len(opt) == len(lr_sched) == 1
    assert len(freq) == 0
    assert opt[0] == opt_a
    assert lr_sched[0] == ref_lr_sched

    # opt single dictionary
    model.configure_optimizers = lambda: {
        "optimizer": opt_a,
        "lr_scheduler": scheduler_a
    }
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert len(opt) == len(lr_sched) == 1
    assert len(freq) == 0
    assert opt[0] == opt_a
    assert lr_sched[0] == ref_lr_sched

    # opt multiple dictionaries with frequencies
    model.configure_optimizers = lambda: (
        {
            "optimizer": opt_a,
            "lr_scheduler": scheduler_a,
            "frequency": 1
        },
        {
            "optimizer": opt_b,
            "lr_scheduler": scheduler_b,
            "frequency": 5
        },
    )
    opt, lr_sched, freq = trainer.init_optimizers(model)
    assert len(opt) == len(lr_sched) == len(freq) == 2
    assert opt[0] == opt_a
    ref_lr_sched["opt_idx"] = 0
    assert lr_sched[0] == ref_lr_sched
    ref_lr_sched["scheduler"] = scheduler_b
    ref_lr_sched["opt_idx"] = 1
    assert lr_sched[1] == ref_lr_sched
    assert freq == [1, 5]
def test_v1_8_0_deprecate_trainer_callback_hook_mixin():
    methods_with_self = [
        "on_before_accelerator_backend_setup",
        "on_configure_sharded_model",
        "on_init_start",
        "on_init_end",
        "on_fit_start",
        "on_fit_end",
        "on_sanity_check_start",
        "on_sanity_check_end",
        "on_train_epoch_start",
        "on_train_epoch_end",
        "on_validation_epoch_start",
        "on_validation_epoch_end",
        "on_test_epoch_start",
        "on_test_epoch_end",
        "on_predict_epoch_start",
        "on_epoch_start",
        "on_epoch_end",
        "on_train_start",
        "on_train_end",
        "on_pretrain_routine_start",
        "on_pretrain_routine_end",
        "on_batch_start",
        "on_batch_end",
        "on_validation_start",
        "on_validation_end",
        "on_test_start",
        "on_test_end",
        "on_predict_start",
        "on_predict_end",
        "on_after_backward",
    ]
    methods_with_stage = [
        "setup",
        "teardown",
    ]
    methods_with_batch_batch_idx_dataloader_idx = [
        "on_train_batch_start",
        "on_validation_batch_start",
        "on_test_batch_start",
        "on_predict_batch_start",
    ]
    methods_with_outputs_batch_batch_idx_dataloader_idx = [
        "on_train_batch_end",
        "on_validation_batch_end",
        "on_test_batch_end",
        "on_predict_batch_end",
    ]
    methods_with_checkpoint = ["on_save_checkpoint", "on_load_checkpoint"]
    trainer = Trainer(
        max_epochs=1,
        limit_val_batches=0.1,
        limit_train_batches=0.2,
        enable_progress_bar=False,
        logger=False,
    )
    model = BoringModel()
    # need to attach model to trainer for testing of `on_pretrain_routine_start`
    trainer.fit(model)
    for method_name in methods_with_self:
        fn = getattr(trainer, method_name, None)
        with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
            fn()
    for method_name in methods_with_stage:
        fn = getattr(trainer, method_name)
        with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
            fn(stage="test")
    for method_name in methods_with_batch_batch_idx_dataloader_idx:
        fn = getattr(trainer, method_name)
        with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
            fn(batch={}, batch_idx=0, dataloader_idx=0)
    for method_name in methods_with_outputs_batch_batch_idx_dataloader_idx:
        fn = getattr(trainer, method_name)
        with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
            fn(outputs=torch.tensor([[1.0, -1.0], [1.0, -1.0]]), batch={}, batch_idx=0, dataloader_idx=0)
    for method_name in methods_with_checkpoint:
        fn = getattr(trainer, method_name)
        with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
            fn(checkpoint={})
    with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
        trainer.on_predict_epoch_end(outputs=torch.tensor([[1.0, -1.0], [1.0, -1.0]]))
    with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
        trainer.on_exception(exception=Exception)
    with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
        trainer.on_before_backward(loss=torch.tensor([[1.0, -1.0], [1.0, -1.0]]))
    with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
        trainer.on_before_optimizer_step(
            optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.9), optimizer_idx=0
        )
    with pytest.deprecated_call(match="was deprecated in v1.6 and will be removed in v1.8"):
        trainer.on_before_zero_grad(optimizer=optim.SGD(model.parameters(), lr=0.01, momentum=0.9))