def test_trainingstep_dict(tmpdir):
    """
    Tests that only training_step can be used
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.val_dataloader = None

    trainer = Trainer(fast_dev_run=True, weights_summary=None)
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert not model.training_epoch_end_called

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx)
    assert out.signal == 0
    assert out.batch_log_metrics['log_acc1'] == 12.0
    assert out.batch_log_metrics['log_acc2'] == 7.0

    pbar_metrics = out.training_step_output_for_epoch_end['pbar_on_batch_end']
    assert pbar_metrics['pbar_acc1'] == 17.0
    assert pbar_metrics['pbar_acc2'] == 19.0
示例#2
0
def training_step_with_step_end(tmpdir):
    """
    Checks train_step + training_step_end
    """
    model = DeterministicModel()
    model.training_step = model.training_step_for_step_end_dict
    model.training_step_end = model.training_step_end_dict
    model.val_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=True,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert model.training_step_end_called
    assert not model.training_epoch_end_called

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert out.batch_log_metrics['log_acc1'] == 14.0
    assert out.batch_log_metrics['log_acc2'] == 9.0

    train_step_end_out = out.training_step_output_for_epoch_end
    pbar_metrics = train_step_end_out['progress_bar']
    assert 'train_step_end' in train_step_end_out
    assert pbar_metrics['pbar_acc1'] == 19.0
    assert pbar_metrics['pbar_acc2'] == 21.0
def training_step_with_step_end(tmpdir):
    """
    Checks train_step + training_step_end
    """
    model = DeterministicModel()
    model.training_step = model.training_step_for_step_end_dict
    model.training_step_end = model.training_step_end_dict
    model.val_dataloader = None

    trainer = Trainer(fast_dev_run=True, weights_summary=None)
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert model.training_step_end_called
    assert not model.training_epoch_end_called

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx)
    signal, grad_norm_dic, all_log_metrics, training_step_output_for_epoch_end = out
    assert signal == 0
    assert all_log_metrics['log_acc1'] == 12.0
    assert all_log_metrics['log_acc2'] == 7.0

    pbar_metrics = training_step_output_for_epoch_end['pbar_on_batch_end']
    assert pbar_metrics['pbar_acc1'] == 17.0
    assert pbar_metrics['pbar_acc2'] == 19.0
def test_validation_step_arbitrary_dict_return(tmpdir):
    """
    Test that val step can return an arbitrary dict
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_arbitary_dict_return
    model.validation_step_end = None
    model.validation_epoch_end = None

    trainer = Trainer(default_root_dir=tmpdir,
                      weights_summary=None,
                      limit_train_batches=2,
                      limit_val_batches=2,
                      max_epochs=2)
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    callback_metrics, eval_results = trainer.run_evaluation(test_mode=False)
    assert len(callback_metrics) == 2
    assert len(eval_results) == 2
    assert eval_results[0]['some'] == 171
    assert eval_results[1]['some'] == 171

    assert eval_results[0]['value'] == 'a'
    assert eval_results[1]['value'] == 'a'

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called
def test_validation_step_no_return(tmpdir):
    """
    Test that val step can return nothing
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_no_return
    model.validation_step_end = None
    model.validation_epoch_end = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=True,
        weights_summary=None,
    )
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    out, eval_results = trainer.run_evaluation(test_mode=False)
    assert len(out) == 0
    assert len(eval_results) == 0

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called
def test_train_step_epoch_end(tmpdir):
    """
    Checks train_step + training_epoch_end (NO training_step_end)
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.training_step_end = None
    model.training_epoch_end = model.training_epoch_end_dict
    model.val_dataloader = None

    trainer = Trainer(max_epochs=1, weights_summary=None)
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert model.training_epoch_end_called

    # assert epoch end metrics were added
    assert trainer.callback_metrics['epoch_end_log_1'] == 178
    assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx)
    assert out.signal == 0
    assert out.batch_log_metrics['log_acc1'] == 12.0
    assert out.batch_log_metrics['log_acc2'] == 7.0

    train_step_end_out = out.training_step_output_for_epoch_end
    pbar_metrics = train_step_end_out['progress_bar']
    assert pbar_metrics['pbar_acc1'] == 17.0
    assert pbar_metrics['pbar_acc2'] == 19.0
def training_step_scalar_with_step_end(tmpdir):
    """
    Checks train_step with scalar only + training_step_end
    """
    model = DeterministicModel()
    model.training_step = model.training_step_scalar_return
    model.training_step_end = model.training_step_end_scalar
    model.val_dataloader = None

    trainer = Trainer(fast_dev_run=True, weights_summary=None)
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert model.training_step_end_called
    assert not model.training_epoch_end_called

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict)

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1
    train_step_out = train_step_out[0][0]
    assert isinstance(train_step_out, torch.Tensor)
    assert train_step_out.item() == 171

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.train_loop.training_step_and_backward(
        batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
    assert opt_closure_result['loss'].item() == 171
def test_no_callbacks_with_train_loop_only(tmpdir):
    """
    Make sure early stop + checkpoint work with only a train loop
    """
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_no_callbacks_result_obj
    model.training_epoch_end = None
    model.val_dataloader = None

    batches = 3
    epochs = 3
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        log_every_n_steps=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    all_losses = trainer.dev_debugger.saved_train_losses
    assert len(all_losses) == batches * epochs

    assert trainer.early_stop_callback is None

    assert len(trainer.dev_debugger.checkpoint_callback_history) == 3
    assert len(trainer.dev_debugger.early_stopping_history) == 0
def test_val_step_step_end_no_return(tmpdir):
    """
    Test that val step + val step end work (with no return in val step end)
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_dict_return
    model.validation_step_end = model.validation_step_end_no_return
    model.validation_epoch_end = None

    trainer = Trainer(default_root_dir=tmpdir,
                      weights_summary=None,
                      limit_train_batches=2,
                      limit_val_batches=2,
                      max_epochs=2)
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    callback_metrics, eval_results = trainer.run_evaluation(test_mode=False)
    assert len(callback_metrics) == 0
    assert len(eval_results) == 0

    # make sure correct steps were called
    assert model.validation_step_called
    assert model.validation_step_end_called
    assert not model.validation_epoch_end_called
def test_full_training_loop_dict(tmpdir):
    """
    Checks train_step + training_step_end + training_epoch_end
    """
    model = DeterministicModel()
    model.training_step = model.training_step_for_step_end_dict
    model.training_step_end = model.training_step_end_dict
    model.training_epoch_end = model.training_epoch_end_dict
    model.val_dataloader = None

    trainer = Trainer(max_epochs=1, weights_summary=None)
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert model.training_step_end_called
    assert model.training_epoch_end_called

    # assert epoch end metrics were added
    assert trainer.callback_metrics['epoch_end_log_1'] == 178
    assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx)
    signal, grad_norm_dic, all_log_metrics, training_step_output_for_epoch_end = out
    assert signal == 0
    assert all_log_metrics['log_acc1'] == 12.0
    assert all_log_metrics['log_acc2'] == 7.0

    pbar_metrics = training_step_output_for_epoch_end['pbar_on_batch_end']
    assert pbar_metrics['pbar_acc1'] == 17.0
    assert pbar_metrics['pbar_acc2'] == 19.0
def test_validation_step_scalar_return(tmpdir):
    """
    Test that val step can return a scalar
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_scalar_return
    model.validation_step_end = None
    model.validation_epoch_end = None

    trainer = Trainer(default_root_dir=tmpdir,
                      weights_summary=None,
                      limit_train_batches=2,
                      limit_val_batches=2,
                      max_epochs=2)
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    out, eval_results = trainer.run_evaluation(test_mode=False)
    assert len(out) == 0
    assert len(eval_results) == 2
    assert eval_results[0] == 171 and eval_results[1] == 171

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called
def test_val_step_only_epoch_metrics(tmpdir):
    """
    Make sure the logged + pbar metrics are allocated accordingly when auto-reduced at epoch end
    """
    # enable internal debugging actions
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_only_epoch_metrics
    model.validation_step_end = None
    model.validation_epoch_end = None

    batches = 3
    epochs = 3
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        row_log_interval=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called

    # no early stopping
    assert len(trainer.dev_debugger.early_stopping_history) == 0

    # make sure we logged the exact number of metrics
    assert len(trainer.dev_debugger.logged_metrics) == epochs
    assert len(trainer.dev_debugger.pbar_added_metrics) == epochs

    # make sure we logged the correct epoch metrics
    for metric in trainer.dev_debugger.logged_metrics:
        assert 'no_val_no_pbar' not in metric
        assert 'val_step_pbar_acc' not in metric
        assert metric['val_step_log_acc'] == (12 + 13) / 2
        assert metric['val_step_log_pbar_acc'] == (13 + 14) / 2

    # make sure we logged the correct epoch pbar metrics
    for metric in trainer.dev_debugger.pbar_added_metrics:
        assert 'no_val_no_pbar' not in metric
        assert 'val_step_log_acc' not in metric
        assert metric['val_step_log_pbar_acc'] == (13 + 14) / 2
        assert metric['val_step_pbar_acc'] == (14 + 15) / 2

    # only 1 checkpoint expected since values didn't change after that
    assert len(trainer.dev_debugger.checkpoint_callback_history) == 1

    # make sure the last known metric is correct
    assert trainer.logger_connector.callback_metrics[
        'val_checkpoint_on'] == 171
示例#13
0
def test_val_step_epoch_end_result(tmpdir):
    """
    Make sure val step + val epoch end works with EvalResult
    """
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_for_epoch_end_result
    model.validation_step_end = None
    model.validation_epoch_end = model.validation_epoch_end_result

    batches = 3
    epochs = 3
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        log_every_n_steps=1,
        limit_train_batches=batches,
        limit_val_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    assert len(trainer.logger_connector.callback_metrics) == 6

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert model.validation_epoch_end_called

    # no early stopping
    assert len(trainer.dev_debugger.early_stopping_history) == 0

    # make sure we logged the exact number of metrics
    assert len(trainer.dev_debugger.logged_metrics) == epochs
    assert len(trainer.dev_debugger.pbar_added_metrics) == epochs

    # make sure we logged the correct metrics
    for metric in trainer.dev_debugger.logged_metrics:
        assert metric['val_epoch_end_metric'] == 189
        assert 'val_step_metric' in metric

    # make sure we pbar logged the correct metrics
    for metric in trainer.dev_debugger.pbar_added_metrics:
        assert metric['val_epoch_end_metric'] == 189
        assert 'val_step_metric' in metric

    # only 1 checkpoint expected since values didn't change after that
    assert len(trainer.dev_debugger.checkpoint_callback_history) == 1

    # make sure the last known metric is correct
    assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 189
示例#14
0
def test_val_step_result_callbacks(tmpdir):
    """
    Tests that val step can be used:
    - val step
    - no other val_xxx
    - train loop
    - callbacks coming from val loop (not train loop)
    """
    # enable internal debugging actions
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_callbacks
    model.validation_step_end = None
    model.validation_epoch_end = None

    batches = 3
    epochs = 300
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called

    # assert that early stopping happened after the requested num of steps
    # if it used the train step for ES then it wouldn't be 5
    assert len(trainer.dev_debugger.early_stopping_history) == 5

    # only 2 checkpoints expected
    assert len(trainer.dev_debugger.checkpoint_callback_history) == 2

    # make sure the last known metric is correct
    assert trainer.logger_connector.callback_metrics[
        'checkpoint_on'] == 171 + 15

    # did not request any metrics to log (except the metrics saying which epoch we are on)
    assert len(trainer.logger_connector.progress_bar_metrics) == 0
    assert len(trainer.dev_debugger.logged_metrics) == 0
def test_full_training_loop_scalar(tmpdir):
    """
    Checks train_step + training_step_end + training_epoch_end
    (all with scalar return from train_step)
    """
    model = DeterministicModel()
    model.training_step = model.training_step_scalar_return
    model.training_step_end = model.training_step_end_scalar
    model.training_epoch_end = model.training_epoch_end_scalar
    model.val_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert model.training_step_end_called
    assert model.training_epoch_end_called

    # assert epoch end metrics were added
    assert 'epoch' in trainer.callback_metrics and len(
        trainer.callback_metrics) == 1
    assert len(trainer.progress_bar_metrics) == 0

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx)
    assert out.signal == 0
    assert len(out.batch_log_metrics) == 0 and isinstance(
        out.batch_log_metrics, dict)
    assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict)

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1
    train_step_out = train_step_out[0][0]
    assert isinstance(train_step_out, torch.Tensor)
    assert train_step_out.item() == 171

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0,
                                                   trainer.optimizers[0],
                                                   trainer.hiddens)
    assert opt_closure_result['loss'].item() == 171
示例#16
0
def test_val_step_using_train_callbacks(tmpdir):
    """
    ES conditioned in train
    CKPT conditioned in val
    """
    # enable internal debugging actions
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_no_callbacks
    model.validation_step_end = None
    model.validation_epoch_end = None

    batches = 3
    epochs = 300
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    expected_epochs = 10

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called

    # early stopping was not conditioned in val loop, but instead in train loop
    assert len(trainer.dev_debugger.early_stopping_history) == expected_epochs

    # only 2 checkpoints expected
    assert len(trainer.dev_debugger.checkpoint_callback_history) == 2

    # make sure the last known metric is correct
    assert trainer.logger_connector.callback_metrics[
        'checkpoint_on'] == 171 + 20

    # did not request any metrics to log (except the metrics saying which epoch we are on)
    assert len(trainer.logger_connector.progress_bar_metrics) == 0
    assert len(trainer.dev_debugger.logged_metrics) == 0
def test_use_callbacks_with_train_loop_only(tmpdir):
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
    model.training_epoch_end = None
    model.val_dataloader = None

    batches = 3
    epochs = 300
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    num_expected_epochs = 10

    # ----------------------------------
    # VERIFY EARLY STOPPING BEHAVIOR
    # ----------------------------------
    # with train loop only it happens on every epoch
    early_stop_vals = trainer.dev_debugger.early_stopping_history
    assert len(early_stop_vals) == num_expected_epochs
    min_val = min([x['best'] for x in early_stop_vals])
    assert min_val == 171 + 9
    all_losses = trainer.dev_debugger.saved_train_losses

    from collections import Counter
    batch_idxs = Counter([x['batch_idx'] for x in all_losses])
    for i, val in batch_idxs.items():
        assert val == num_expected_epochs
        assert i in [0, 1, 2]

    # ----------------------------------
    # VERIFY CHECKPOINTING BEHAVIOR
    # ----------------------------------
    ckpt_vals = trainer.dev_debugger.checkpoint_callback_history
    assert len(ckpt_vals) == 5, '5 ckpts should have been saved'
    for ckpt_val, expected_epoch in zip(ckpt_vals, [0, 1, 2, 3, 6]):
        assert ckpt_val['epoch'] == expected_epoch
        assert ckpt_val['monitor'] == 'checkpoint_on'
def test_training_step_dict(tmpdir):
    """
    Tests that only training_step can be used
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.val_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=True,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert not model.training_epoch_end_called

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)

    assert out.signal == 0
    assert trainer.logger_connector.logged_metrics['log_acc1'] == 12.0
    assert trainer.logger_connector.logged_metrics['log_acc2'] == 7.0

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1

    train_step_out = train_step_out[0][0]
    pbar_metrics = train_step_out['progress_bar']
    assert 'log' in train_step_out
    assert 'progress_bar' in train_step_out
    assert train_step_out['train_step_test'] == 549
    assert pbar_metrics['pbar_acc1'] == 17.0
    assert pbar_metrics['pbar_acc2'] == 19.0

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.train_loop.training_step_and_backward(
        batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
示例#19
0
def test_val_step_only_step_metrics(tmpdir):
    """
    Make sure the logged + pbar metrics are allocated accordingly at every step when requested
    """
    # enable internal debugging actions
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_only_step_metrics
    model.validation_step_end = None
    model.validation_epoch_end = None

    batches = 3
    epochs = 3
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        log_every_n_steps=1,
        limit_train_batches=batches,
        limit_val_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called

    # no early stopping
    assert len(trainer.dev_debugger.early_stopping_history) == 0

    # make sure we logged the exact number of metrics
    assert len(trainer.dev_debugger.logged_metrics) == epochs * batches
    assert len(
        trainer.dev_debugger.pbar_added_metrics) == epochs * batches + (epochs)

    # only 1 checkpoint expected since values didn't change after that
    assert len(trainer.dev_debugger.checkpoint_callback_history) == 1

    # make sure the last known metric is correct
    assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 189
示例#20
0
def test_val_step_step_end(tmpdir):
    """
    Test that val step + val step end work
    """

    os.environ['PL_DEV_DEBUG'] = '0'

    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_dict_return
    model.validation_step_end = model.validation_step_end
    model.validation_epoch_end = None

    trainer = Trainer(default_root_dir=tmpdir,
                      weights_summary=None,
                      limit_train_batches=2,
                      limit_val_batches=2,
                      max_epochs=2)
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    callback_metrics, eval_results = trainer.run_evaluation(test_mode=False)
    assert len(callback_metrics) == 1
    assert len(callback_metrics[0]) == 6

    callback_metrics = callback_metrics[0]
    assert callback_metrics['val_step_end'] == 1802
    assert len(eval_results) == 2
    assert eval_results[0]['log']['log_acc1'] == 12
    assert eval_results[1]['log']['log_acc1'] == 13

    for k in ['val_loss', 'log', 'progress_bar']:
        assert k in eval_results[0]
        assert k in eval_results[1]

    # ensure all the keys ended up as candidates for callbacks
    assert len(trainer.logger_connector.callback_metrics) in [8, 9]

    # make sure correct steps were called
    assert model.validation_step_called
    assert model.validation_step_end_called
    assert not model.validation_epoch_end_called
示例#21
0
def test_result_obj_lr_scheduler_step(tmpdir):
    """
    test that the LR scheduler was called at the correct time with the correct metrics
    """
    model = DeterministicModel()
    model.training_step = model.training_step_for_step_end_dict
    model.training_step_end = model.training_step_end_dict
    model.training_epoch_end = model.training_epoch_end_dict
    model.val_dataloader = None
    model.configure_optimizers = model.configure_optimizers__lr_on_plateau_step

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        weights_summary=None,
    )
    trainer.fit(model)

    assert len(trainer.dev_debugger.saved_lr_scheduler_updates) == 8
def test_train_step_epoch_end_scalar(tmpdir):
    """
    Checks train_step + training_epoch_end (NO training_step_end)
    (with scalar return)
    """
    os.environ['PL_DEV_DEBUG'] = '0'

    model = DeterministicModel()
    model.training_step = model.training_step_scalar_return
    model.training_step_end = None
    model.training_epoch_end = model.training_epoch_end_scalar
    model.val_dataloader = None

    trainer = Trainer(max_epochs=1, weights_summary=None)
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert model.training_epoch_end_called

    # assert epoch end metrics were added
    assert len(trainer.logger_connector.callback_metrics) == 0
    assert len(trainer.logger_connector.progress_bar_metrics) == 0

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict)

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1
    train_step_out = train_step_out[0][0]
    assert isinstance(train_step_out['minimize'], torch.Tensor)
    assert train_step_out['minimize'].item() == 171

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.train_loop.training_step_and_backward(
        batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
    assert opt_closure_result['loss'].item() == 171
def test_train_step_epoch_end(tmpdir):
    """
    Checks train_step + training_epoch_end (NO training_step_end)
    """
    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.training_step_end = None
    model.training_epoch_end = model.training_epoch_end_dict
    model.val_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert model.training_epoch_end_called

    # assert epoch end metrics were added
    assert trainer.logger_connector.callback_metrics['epoch_end_log_1'] == 178
    assert trainer.logger_connector.progress_bar_metrics[
        'epoch_end_pbar_1'] == 234

    # make sure training outputs what is expected
    batch_idx, batch = 0, next(iter(model.train_dataloader()))

    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert trainer.logger_connector.logged_metrics['log_acc1'] == 12.0
    assert trainer.logger_connector.logged_metrics['log_acc2'] == 7.0

    # outputs are for 1 optimizer and no tbptt
    train_step_end_out = out.training_step_output_for_epoch_end
    assert len(train_step_end_out) == 1
    train_step_end_out = train_step_end_out[0][0]

    pbar_metrics = train_step_end_out['progress_bar']
    assert pbar_metrics['pbar_acc1'] == 17.0
    assert pbar_metrics['pbar_acc2'] == 19.0
def test_full_val_loop(tmpdir):
    """
    Test that val step + val step end + val epoch end
    """

    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_dict_return
    model.validation_step_end = model.validation_step_end
    model.validation_epoch_end = model.validation_epoch_end

    trainer = Trainer(
        default_root_dir=tmpdir,
        weights_summary=None,
        limit_train_batches=2,
        limit_val_batches=3,
        num_sanity_val_steps=0,
        max_epochs=2
    )
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    callback_metrics, eval_results = trainer.run_evaluation()
    assert len(callback_metrics) == 1
    assert len(callback_metrics[0]) == 7
    assert len(eval_results) == 1

    eval_results = eval_results[0]
    assert eval_results['val_step_end'] == 1802
    assert eval_results['val_epoch_end'] == 1233

    for k in ['val_loss', 'log', 'progress_bar']:
        assert k in eval_results

    # ensure all the keys ended up as candidates for callbacks
    assert len(trainer.logger_connector.callback_metrics) in [9, 10]

    # make sure correct steps were called
    assert model.validation_step_called
    assert model.validation_step_end_called
    assert model.validation_epoch_end_called
def test_validation_step_dict_return(tmpdir):
    """
    Test that val step can return a dict with all the expected keys and they end up
    in the correct place
    """

    model = DeterministicModel()
    model.training_step = model.training_step_dict_return
    model.validation_step = model.validation_step_dict_return
    model.validation_step_end = None
    model.validation_epoch_end = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        weights_summary=None,
        limit_train_batches=2,
        limit_val_batches=2,
        max_epochs=2
    )
    trainer.fit(model)

    # out are the results of the full loop
    # eval_results are output of _evaluate
    callback_metrics, eval_results = trainer.run_evaluation()
    assert len(callback_metrics) == 1
    assert len(callback_metrics[0]) == 5
    assert len(eval_results) == 2
    assert eval_results[0]['log']['log_acc1'] == 12
    assert eval_results[1]['log']['log_acc1'] == 13

    for k in ['val_loss', 'log', 'progress_bar']:
        assert k in eval_results[0]
        assert k in eval_results[1]

    # ensure all the keys ended up as candidates for callbacks
    assert len(trainer.logger_connector.callback_metrics) in [7, 8]

    # make sure correct steps were called
    assert model.validation_step_called
    assert not model.validation_step_end_called
    assert not model.validation_epoch_end_called
示例#26
0
def test_no_auto_callbacks_with_train_loop_only(tmpdir):
    """
    Make sure early stop + checkpoint work with only a train loop
    """
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_no_default_callbacks_for_train_loop
    model.training_epoch_end = None
    model.val_dataloader = None

    batches = 3
    epochs = 3
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        row_log_interval=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    assert len(trainer.logger_connector.callback_metrics) == 1

    all_losses = trainer.dev_debugger.saved_train_losses
    assert len(all_losses) == batches * epochs

    assert trainer.checkpoint_callback.monitor == 'checkpoint_on'
    assert trainer.early_stop_callback is None

    trainer = Trainer(
        default_root_dir=tmpdir,
        early_stop_callback=True,
        max_epochs=epochs,
        row_log_interval=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    assert trainer.early_stop_callback.monitor == 'early_stop_on'
def test_full_training_loop_dict(tmpdir):
    """
    Checks train_step + training_step_end + training_epoch_end
    """
    model = DeterministicModel()
    model.training_step = model.training_step_for_step_end_dict
    model.training_step_end = model.training_step_end_dict
    model.training_epoch_end = model.training_epoch_end_dict
    model.val_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert model.training_step_end_called
    assert model.training_epoch_end_called

    # assert epoch end metrics were added
    assert trainer.callback_metrics['epoch_end_log_1'] == 178
    assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234

    # make sure training outputs what is expected
    batch_idx, batch = 0, next(iter(model.train_dataloader()))

    out = trainer.run_training_batch(batch, batch_idx)
    assert out.signal == 0
    assert out.batch_log_metrics['log_acc1'] == 14.0
    assert out.batch_log_metrics['log_acc2'] == 9.0

    # get the output of the first optimizer
    train_step_end_out = out.training_step_output_for_epoch_end
    assert len(train_step_end_out) == 1
    train_step_end_out = train_step_end_out[0][0]
    pbar_metrics = train_step_end_out['progress_bar']
    assert pbar_metrics['pbar_acc1'] == 19.0
    assert pbar_metrics['pbar_acc2'] == 21.0
示例#28
0
def test_training_step_scalar(tmpdir):
    """
    Tests that only training_step that returns a single scalar can be used
    """
    model = DeterministicModel()
    model.training_step = model.training_step_scalar_return
    model.val_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        fast_dev_run=True,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert not model.training_epoch_end_called

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert len(out.batch_log_metrics) == 0 and isinstance(
        out.batch_log_metrics, dict)
    assert len(out.grad_norm_dic) == 0 and isinstance(out.grad_norm_dic, dict)

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1
    train_step_out = train_step_out[0][0]
    assert isinstance(train_step_out, torch.Tensor)
    assert train_step_out.item() == 171

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0,
                                                   trainer.optimizers[0],
                                                   trainer.hiddens)
    assert opt_closure_result['loss'].item() == 171
def test_training_step_result_log_epoch_only(tmpdir):
    """
    Tests that only training_step can be used with TrainResult
    Makes sure that things are routed to pbar, loggers and loss accordingly

    Makes sure pbar and logs happen on epoch only when requested
    """
    # enable internal debugging actions
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_only
    model.training_step_end = None
    model.training_epoch_end = None
    model.val_dataloader = None

    epochs = 3
    batches = 2
    trainer = Trainer(
        default_root_dir=tmpdir,
        limit_train_batches=batches,
        limit_val_batches=batches,
        log_every_n_steps=1,
        max_epochs=epochs,
        weights_summary=None,
    )
    trainer.fit(model)

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert not model.training_epoch_end_called

    assert len(trainer.logger_connector.callback_metrics) == 11

    # make sure correct metrics are logged (one per batch step as requested)
    assert len(trainer.dev_debugger.logged_metrics) == epochs
    epoch_metrics = trainer.dev_debugger.logged_metrics
    assert len(epoch_metrics) == epochs
    for batch_idx, logged_metrics in enumerate(epoch_metrics):
        assert logged_metrics[f'epoch_log_and_pbar_acc1_e{batch_idx}'] == 14.0
        assert logged_metrics[f'epoch_log_acc2_e{batch_idx}'] == 15.0
        assert f'epoch_pbar_acc3_e{batch_idx}' not in logged_metrics
        assert len(logged_metrics) == 4

    # make sure we are using the correct metrics for callbacks
    assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 171

    # make sure pbar metrics are correct ang log metrics did not leak
    for epoch_idx in range(epochs):
        assert trainer.logger_connector.progress_bar_metrics[
            f'epoch_log_and_pbar_acc1_e{epoch_idx}'] == 14
        assert trainer.logger_connector.progress_bar_metrics[
            f'epoch_pbar_acc3_e{epoch_idx}'] == 16
        assert f'epoch_log_acc2_e{epoch_idx}' not in trainer.logger_connector.progress_bar_metrics

    # make sure training outputs what is expected
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert len(out.batch_log_metrics) == 0

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1
    train_step_out = train_step_out[0][0]
    assert isinstance(train_step_out, TrainResult)

    assert 'minimize' in train_step_out
    assert f'epoch_log_and_pbar_acc1_e{trainer.current_epoch}' in train_step_out
    assert f'epoch_log_acc2_e{trainer.current_epoch}' in train_step_out

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.train_loop.training_step_and_backward(
        batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
def test_training_step_epoch_end_result(tmpdir):
    """
    Makes sure training_step and epoch_end can be used with Results (without batch_end)
    """
    os.environ['PL_DEV_DEBUG'] = '1'

    model = DeterministicModel()
    model.training_step = model.training_step_result_log_epoch_and_step
    model.training_epoch_end = model.training_epoch_end_return_for_log_epoch_and_step
    model.val_dataloader = None

    batches = 3
    epochs = 1
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        log_every_n_steps=1,
        limit_train_batches=batches,
        weights_summary=None,
    )
    trainer.fit(model)

    assert len(trainer.logger_connector.callback_metrics) == 17

    # make sure correct steps were called
    assert model.training_step_called
    assert not model.training_step_end_called
    assert model.training_epoch_end_called

    # make sure correct metrics were logged
    logged_metrics = trainer.dev_debugger.logged_metrics
    assert len(logged_metrics) == (epochs * batches) + epochs
    last_logged = logged_metrics[-1]

    assert last_logged['step_epoch_log_and_pbar_acc1_epoch'] == 210.0
    assert last_logged['step_epoch_log_acc2_epoch'] == 336.0
    assert last_logged['epoch_end_log_acc_epoch'] == 1212.0
    assert last_logged['epoch_end_log_pbar_acc_epoch'] == 1214.0
    assert 'epoch_end_pbar_acc' not in last_logged

    # make sure pbar metrics are correct
    logged_pbar = trainer.dev_debugger.pbar_added_metrics
    assert len(logged_pbar) == (epochs * batches) + epochs

    assert trainer.logger_connector.progress_bar_metrics[
        'step_epoch_log_and_pbar_acc1_epoch'] == 210.0
    assert trainer.logger_connector.progress_bar_metrics[
        'step_epoch_log_and_pbar_acc1_step'] == 7.0
    assert trainer.logger_connector.progress_bar_metrics[
        'step_epoch_pbar_acc3_epoch'] == 504.0
    assert trainer.logger_connector.progress_bar_metrics[
        'epoch_end_pbar_acc_epoch'] == 1213.0
    assert trainer.logger_connector.progress_bar_metrics[
        'epoch_end_log_pbar_acc_epoch'] == 1214.0
    assert 'epoch_end_log_acc' not in trainer.logger_connector.progress_bar_metrics
    assert 'log_acc2' not in trainer.logger_connector.progress_bar_metrics

    # make sure callback metrics didn't change
    assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 171

    # -----------------------------------------
    # make sure training outputs what is expected
    # -----------------------------------------
    for batch_idx, batch in enumerate(model.train_dataloader()):
        break

    out = trainer.train_loop.run_training_batch(batch, batch_idx, 0)
    assert out.signal == 0
    assert len(out.batch_log_metrics) == 4

    train_step_out = out.training_step_output_for_epoch_end
    assert len(train_step_out) == 1
    train_step_out = train_step_out[0][0]
    assert isinstance(train_step_out, TrainResult)

    assert 'minimize' in train_step_out
    assert 'step_epoch_log_and_pbar_acc1_step' in train_step_out
    assert 'step_epoch_log_and_pbar_acc1_epoch' in train_step_out
    assert 'step_epoch_log_acc2_step' in train_step_out
    assert 'step_epoch_log_acc2_epoch' in train_step_out

    # make sure the optimizer closure returns the correct things
    opt_closure_result = trainer.train_loop.training_step_and_backward(
        batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)