Exemplo n.º 1
0
def test_model_loader_find_best(tmpdir):
    # empty directory
    assert ModelLoader.find_best(tmpdir, "test") is None

    # with no-monitor ckpts
    trainer = DummyTrainer(
        default_root_dir=tmpdir,
        callbacks=[
            pl.callbacks.ModelCheckpoint(dirpath=tmpdir,
                                         save_top_k=-1,
                                         filename="{epoch}")
        ],
        checkpoint_callback=True,
        max_epochs=3,
    )
    trainer.fit(DummyEngine(), datamodule=DummyMNIST())
    assert ModelLoader.find_best(tmpdir, "test") is None

    # with monitor ckpts
    monitor = "bar"
    mc = pl.callbacks.ModelCheckpoint(dirpath=tmpdir,
                                      save_top_k=-1,
                                      monitor=monitor,
                                      mode="max",
                                      filename="{epoch}")
    trainer = DummyTrainer(default_root_dir=tmpdir,
                           callbacks=[mc],
                           checkpoint_callback=True,
                           max_epochs=3)
    trainer.fit(DummyEngine(), datamodule=DummyMNIST())
    assert (ModelLoader.find_best(tmpdir, monitor, mode="max") ==
            tmpdir / "epoch=2-v0.ckpt" == mc.best_model_path)
    assert (ModelLoader.find_best(tmpdir, monitor,
                                  mode="min") == tmpdir / "epoch=0-v0.ckpt")
Exemplo n.º 2
0
def test_cpu(tmpdir, num_processes):
    log_filepath = tmpdir / "log"
    trainer = DummyTrainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        callbacks=[TrainingTimer(), __TestCallback()],
        accelerator="ddp_cpu" if num_processes > 1 else None,
        num_processes=num_processes,
        plugins=[DummyLoggingPlugin(log_filepath)],
    )
    trainer.fit(DummyEngine(), datamodule=DummyMNIST(batch_size=1))

    # caplog does not seem to work with multiprocessing.spawn
    # test logging on saved log file
    if num_processes > 1:
        log_filepath_rank1 = tmpdir.join("log.rank1")
        assert log_filepath_rank1.exists()
        assert not log_filepath_rank1.read_text("utf-8")

    assert log_filepath.exists()
    lines = [l.strip() for l in log_filepath.readlines()]
    print(lines)
    lines = [
        l.startswith(f"E{e}: tr_time=") for l in lines
        for e in range(trainer.max_epochs)
    ]
    assert sum(lines) == trainer.max_epochs
Exemplo n.º 3
0
def test_epoch_csv_logger(tmpdir, num_processes):
    pl.seed_everything(0)

    # run twice
    for csv_filename in ("metrics.csv", "metrics-v0.csv"):
        trainer = DummyTrainer(
            default_root_dir=tmpdir,
            max_epochs=3,
            accelerator="ddp_cpu" if num_processes > 1 else None,
            num_processes=num_processes,
            logger=EpochCSVLogger(tmpdir),
        )
        trainer.fit(DummyEngine(), datamodule=DummyMNIST(batch_size=2))

        csv = pd.read_csv(tmpdir / csv_filename)
        # check epoch values
        assert (
            list(csv["epoch"].values)
            == list(csv["foo"].values)
            == list(range(trainer.max_epochs))
        )
        # check test variable "bar" values
        assert list(csv["bar"].values) == list(
            range(
                trainer.limit_train_batches - 1,
                (trainer.limit_train_batches * trainer.max_epochs),
                trainer.limit_train_batches,
            )
        )
        # check losses are floats
        assert all(isinstance(v, float) for v in csv["tr_loss"].values)
Exemplo n.º 4
0
def test_learning_rate_warns(tmpdir):
    trainer = DummyTrainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        callbacks=[LearningRate()],
    )
    with pytest.warns(RuntimeWarning,
                      match=r"You are using LearningRateMonitor.*"):
        trainer.fit(DummyEngine(), datamodule=DummyMNIST())
Exemplo n.º 5
0
def test_progress_bar(tmpdir):
    pbar = ProgressBar()
    module = DummyEngine()
    data_module = DummyMNIST()
    trainer = DummyTrainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        callbacks=[pbar, __TestCallback(pbar)],
    )

    # fake things to include in the pbar
    trainer.progress_bar_metrics["tr_cer"] = 1
    trainer.progress_bar_metrics["va_cer"] = 0.33
    trainer.progress_bar_metrics["gpu_stats"] = {"gpu_stats": "baz"}

    trainer.fit(module, datamodule=data_module)
    n, m = trainer.limit_train_batches, trainer.limit_val_batches
    assert pbar.is_enabled
    # check counts
    assert pbar.total_train_batches == pbar.main_progress_bar.total == n
    assert pbar.total_val_batches == pbar.val_progress_bar.total == m
    # check end was reached
    assert pbar.main_progress_bar.n == pbar.train_batch_idx == n
    assert pbar.val_progress_bar.n == pbar.val_batch_idx == m
    # check test bar is off
    assert pbar.total_test_batches == 0
    assert pbar.test_progress_bar is None
    # check bar string
    float_pattern = "([0-9]*[.])?[0-9]+"
    pattern = (
        r" - E1: "
        r"100%\|[█]+\| 10/10 \[00:0[0-9]<00:00, "
        rf"{float_pattern}it/s, "
        rf"loss={float_pattern}, "
        rf"cer={float_pattern}%, "
        r"gpu_stats={'gpu_stats': 'baz'}]"
    )
    assert re.match("TR" + pattern, str(pbar.main_progress_bar))
    assert re.match("VA" + pattern, str(pbar.val_progress_bar))

    trainer.test(module, datamodule=data_module)
    # previous checks for test
    k = trainer.limit_test_batches
    assert pbar.total_test_batches == pbar.test_progress_bar.total == k
    assert pbar.test_progress_bar.n == pbar.test_batch_idx == k
    assert re.match(
        rf"Decoding: 100%\|[█]+\| 10/10 \[00:00<00:00, {float_pattern}it/s]",
        str(pbar.test_progress_bar),
    )
Exemplo n.º 6
0
def test_model_loader_prepare_checkpoint(tmpdir):
    # create some checkpoints
    monitor = "bar"
    exp_dirpath = tmpdir / "experiment"
    trainer = DummyTrainer(
        default_root_dir=tmpdir,
        callbacks=[
            pl.callbacks.ModelCheckpoint(
                dirpath=exp_dirpath,
                save_top_k=-1,
                monitor=monitor,
                mode="max",
                filename="{epoch}",
            )
        ],
        checkpoint_callback=True,
        max_epochs=2,
    )
    trainer.fit(DummyEngine(), datamodule=DummyMNIST())

    expected = exp_dirpath / "epoch=0.ckpt"
    # nothing
    assert ModelLoader.prepare_checkpoint("", exp_dirpath, monitor) == expected
    # direct path
    assert ModelLoader.prepare_checkpoint(expected, exp_dirpath,
                                          monitor) == expected
    # direct path outside of exp_dirpath
    shutil.copy(expected, "/tmp")
    assert (ModelLoader.prepare_checkpoint("/tmp/epoch=0.ckpt", exp_dirpath,
                                           monitor) == "/tmp/epoch=0.ckpt")
    # filename
    assert (ModelLoader.prepare_checkpoint("epoch=0.ckpt", exp_dirpath,
                                           monitor) == expected)
    # globbed filename
    assert (ModelLoader.prepare_checkpoint(
        "epoch=?.ckpt", exp_dirpath, monitor) == exp_dirpath / "epoch=1.ckpt")
    # failures
    with pytest.raises(AssertionError,
                       match="Could not find a valid checkpoint in"):
        ModelLoader.prepare_checkpoint("", tmpdir, monitor)
    with pytest.raises(AssertionError, match="Could not find the checkpoint"):
        ModelLoader.prepare_checkpoint("?", exp_dirpath, monitor)
def test_progress_bar_gpu_stats(monkeypatch, tmpdir):
    def _fake_on_train_start(self, *_):
        self._gpu_ids = "0,1"

    fake_stats = [[1.2, 2.3], [3.4, 4.5]]
    monkeypatch.setattr(shutil, "which", lambda _: True)
    monkeypatch.setattr(ProgressBarGPUStats, "on_train_start",
                        _fake_on_train_start)
    monkeypatch.setattr(ProgressBarGPUStats, "_get_gpu_stats",
                        lambda *_: fake_stats)

    trainer = DummyTrainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        callbacks=[ProgressBarGPUStats()],
    )
    trainer.fit(DummyEngine(), datamodule=DummyMNIST())

    expected = {
        f"GPU-{i}": f"{int(fake_stats[i][0])}/{int(sum(fake_stats[i]))}MB"
        for i in range(2)
    }
    assert trainer.progress_bar_dict["gpu_stats"] == expected