def test_config_comparison() -> PlotCrossValidationConfig:
    return PlotCrossValidationConfig(
        run_recovery_id=get_most_recent_run_id() + "_0",
        epoch=1,
        comparison_run_recovery_ids=[get_most_recent_run_id() + "_1"],
        model_category=ModelCategory.Segmentation
    )
def test_get_checkpoints_to_test_single_run(test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
                                                     project_root=test_output_dirs.root_dir)

    run_recovery_id = get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)

    # Now set a run recovery object and set the start epoch to 1, so we get one epoch from
    # run recovery and one from the training checkpoints
    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id

    checkpoint_handler.additional_training_done()
    checkpoint_handler.download_recovery_checkpoints_or_weights()

    checkpoint_and_paths = checkpoint_handler.get_checkpoints_to_test()

    assert checkpoint_and_paths
    assert len(checkpoint_and_paths) == 1
    assert checkpoint_and_paths[0] == config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX

    # Copy checkpoint to make it seem like training has happened
    expected_checkpoint = config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    expected_checkpoint.touch()
    checkpoint_and_paths = checkpoint_handler.get_checkpoints_to_test()

    assert checkpoint_and_paths
    assert len(checkpoint_and_paths) == 1
    assert checkpoint_and_paths[0] == expected_checkpoint
def test_download_checkpoints_from_single_run(
        test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)

    # No checkpoint handling options set.
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    run_recovery_id = get_most_recent_run_id(
        fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)

    # Set a run recovery object - non ensemble
    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    checkpoint_handler.download_recovery_checkpoints_or_weights()
    assert checkpoint_handler.run_recovery

    expected_checkpoint_root = config.checkpoint_folder / run_recovery_id.split(
        ":")[1]
    expected_paths = [
        create_recovery_checkpoint_path(path=expected_checkpoint_root),
        expected_checkpoint_root / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    ]
    assert checkpoint_handler.run_recovery.checkpoints_roots == [
        expected_checkpoint_root
    ]
    for path in expected_paths:
        assert path.is_file()
Пример #4
0
def test_is_completed_single_run() -> None:
    """
    Test if we can correctly check run status for a single run.
    :return:
    """
    logging_to_stdout()
    workspace = get_default_workspace()
    get_run_and_check(get_most_recent_run_id(), True, workspace)
Пример #5
0
def test_is_completed_ensemble_run() -> None:
    """
    Test if we can correctly check run status and status of child runs for an ensemble run.
    :return:
    """
    logging_to_stdout()
    workspace = get_default_workspace()
    run_id = get_most_recent_run_id(
        fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    get_run_and_check(run_id, True, workspace)
def test_get_comparison_data(test_output_dirs: OutputFolderForTests) -> None:
    azure_config = get_default_azure_config()
    comparison_name = "DefaultName"
    comparison_path = get_most_recent_run_id() + \
                      f"/{DEFAULT_AML_UPLOAD_DIR}/{BEST_EPOCH_FOLDER_NAME}/{ModelExecutionMode.TEST.value}"
    baselines = get_comparison_baselines(test_output_dirs.root_dir,
                                         azure_config,
                                         [(comparison_name, comparison_path)])
    assert len(baselines) == 1
    assert baselines[0].name == comparison_name
def test_download_recovery_checkpoints_from_ensemble_run(test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
                                                        project_root=test_output_dirs.root_dir)

    run_recovery_id = get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    with pytest.raises(ValueError) as ex:
        checkpoint_handler.download_recovery_checkpoints_or_weights()
    assert "has child runs" in str(ex)
def test_get_best_checkpoint_single_run(
        test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)

    # We have not set a run_recovery, nor have we trained, so this should fail to get a checkpoint
    with pytest.raises(ValueError) as ex:
        checkpoint_handler.get_best_checkpoint()
        assert "no run recovery object provided and no training has been done in this run" in ex.value.args[
            0]

    run_recovery_id = get_most_recent_run_id(
        fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)

    # We have set a run_recovery_id now, so this should work: Should download all checkpoints that are available
    # in the run, into a subfolder of the checkpoint folder
    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    checkpoint_handler.download_recovery_checkpoints_or_weights()
    expected_checkpoint = config.checkpoint_folder / run_recovery_id.split(":")[1] \
                          / f"{BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX}"
    checkpoint_paths = checkpoint_handler.get_best_checkpoint()
    assert checkpoint_paths
    assert len(checkpoint_paths) == 1
    assert expected_checkpoint == checkpoint_paths[0]

    # From now on, the checkpoint handler will think that the run was started from epoch 1. We should pick up
    # the best checkpoint from the current run, or from the run recovery if the best checkpoint is there
    # and so no checkpoints have been written in the resumed run.
    checkpoint_handler.additional_training_done()
    # go back to non ensemble run recovery
    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    checkpoint_handler.download_recovery_checkpoints_or_weights()

    config.start_epoch = 1
    # There is no checkpoint in the current run - use the one from run_recovery
    checkpoint_paths = checkpoint_handler.get_best_checkpoint()
    expected_checkpoint = config.checkpoint_folder / run_recovery_id.split(":")[1] \
                          / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    assert checkpoint_paths
    assert len(checkpoint_paths) == 1
    assert checkpoint_paths[0] == expected_checkpoint

    # Copy over checkpoints to make it look like training has happened and a better checkpoint written
    expected_checkpoint = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    expected_checkpoint.touch()
    checkpoint_paths = checkpoint_handler.get_best_checkpoint()
    assert checkpoint_paths
    assert len(checkpoint_paths) == 1
    assert expected_checkpoint == checkpoint_paths[0]
def test_get_recovery_path_train_single_run(test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
                                                        project_root=test_output_dirs.root_dir)

    run_recovery_id = get_most_recent_run_id(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)

    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    checkpoint_handler.download_recovery_checkpoints_or_weights()

    # Run recovery with start epoch provided should succeed
    expected_path = get_recovery_checkpoint_path(path=config.checkpoint_folder)
    assert checkpoint_handler.get_recovery_or_checkpoint_path_train() == expected_path
def test_add_comparison_data() -> None:
    fallback_run = get_most_recent_run_id(
        fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    crossval_config = PlotCrossValidationConfig(
        run_recovery_id=fallback_run + "_0",
        epoch=1,
        comparison_run_recovery_ids=[fallback_run + "_1"],
        model_category=ModelCategory.Segmentation)
    crossval_config.epoch = 2
    metrics_df, root_folder = download_metrics(crossval_config)
    initial_metrics = pd.concat(list(metrics_df.values()))
    all_metrics, focus_splits = add_comparison_data(crossval_config,
                                                    initial_metrics)
    focus_split = crossval_config.run_recovery_id
    comparison_split = crossval_config.comparison_run_recovery_ids[0]
    assert focus_splits == [focus_split]
    assert set(all_metrics.split) == {focus_split, comparison_split}
def test_get_recovery_path_train_single_run(
        test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)

    run_recovery_id = get_most_recent_run_id(
        fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)

    checkpoint_handler.azure_config.run_recovery_id = run_recovery_id
    checkpoint_handler.download_recovery_checkpoints_or_weights()

    # We have not set a start_epoch but we are trying to use run_recovery, this should fail
    with pytest.raises(ValueError) as ex:
        checkpoint_handler.get_recovery_path_train()
        assert "Run recovery set, but start epoch is 0" in ex.value.args[0]

    # Run recovery with start epoch provided should succeed
    config.start_epoch = 20
    expected_path = create_recovery_checkpoint_path(
        path=config.checkpoint_folder / run_recovery_id.split(":")[1])
    assert checkpoint_handler.get_recovery_path_train() == expected_path
Пример #12
0
def test_config() -> PlotCrossValidationConfig:
    return PlotCrossValidationConfig(run_recovery_id=get_most_recent_run_id(),
                                     epoch=1,
                                     model_category=ModelCategory.Segmentation)