def test_compare_folder_against_run( test_output_dirs: OutputFolderForTests) -> None: """ Test if we can compare that a set of files exists in an AML run. """ logging_to_stdout(log_level=logging.DEBUG) run = get_most_recent_run( fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) file1 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_FOLDER / \ FINAL_MODEL_FOLDER / MODEL_INFERENCE_JSON_FILE_NAME create_folder_and_write_text( file1, '{"model_name": "BasicModel2Epochs", "checkpoint_paths": [' '"checkpoints/last.ckpt"], ' '"model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}' ) with mock.patch("InnerEye.ML.baselines_util.RUN_CONTEXT", run): # First comparison only on the .json file should pass compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) # Now add a file to the set of expected files that does not exist in the run: comparison should now fail no_such_file = "no_such_file.txt" file2 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_FOLDER / no_such_file create_folder_and_write_text(file2, "foo") with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) message = ex.value.args[0].splitlines() assert f"{baselines_util.MISSING_FILE}: {no_such_file}" in message # Now run the same comparison that failed previously, without mocking the RUN_CONTEXT. This should now # realize that the present run is an offline run, and skip the comparison compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0)
def test_is_cross_validation_child_run_single_run() -> None: """ Test that cross validation child runs are identified correctly. A single run should not be identified as a cross validation run. """ run = get_most_recent_run() # check for offline run assert not is_cross_validation_child_run(Run.get_context()) # check for online runs assert not is_cross_validation_child_run(run)
def test_get_cross_validation_split_index_single_run() -> None: """ Test that retrieved cross validation split index is as expected, for single runs. """ run = get_most_recent_run() # check for offline run assert get_cross_validation_split_index( Run.get_context()) == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX # check for online runs assert get_cross_validation_split_index( run) == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX
def test_download_recovery_single_run(test_output_dirs: OutputFolderForTests, runner_config: AzureConfig) -> None: output_dir = test_output_dirs.root_dir config = ModelConfigBase(should_validate=False) config.set_output_to(output_dir) run = get_most_recent_run(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) run_recovery = RunRecovery.download_all_checkpoints_from_run(config, run) # This fails if there is no recovery checkpoint check_single_checkpoint(run_recovery.get_recovery_checkpoint_paths()) check_single_checkpoint(run_recovery.get_best_checkpoint_paths())
def test_is_cross_validation_child_run_ensemble_run() -> None: """ Test that cross validation child runs are identified correctly. """ # check for offline run assert not is_cross_validation_child_run(Run.get_context()) # check for online runs run = get_most_recent_run( fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) assert not is_cross_validation_child_run(run) assert all( [is_cross_validation_child_run(x) for x in fetch_child_runs(run)])
def test_download_checkpoints_from_hyperdrive_child_runs(test_output_dirs: OutputFolderForTests) -> None: config = ModelConfigBase(should_validate=False) config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) hyperdrive_run = get_most_recent_run(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) checkpoint_handler.download_checkpoints_from_hyperdrive_child_runs(hyperdrive_run) expected_checkpoints = [config.checkpoint_folder / OTHER_RUNS_SUBDIR_NAME / str(i) / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX for i in range(2)] checkpoint_paths = checkpoint_handler.get_best_checkpoints() assert checkpoint_paths assert len(checkpoint_paths) == 2 assert set(expected_checkpoints) == set(checkpoint_paths)
def test_download_best_checkpoints_ensemble_run(test_output_dirs: OutputFolderForTests, runner_config: AzureConfig) -> None: output_dir = test_output_dirs.root_dir config = ModelConfigBase(should_validate=False) config.set_output_to(output_dir) run = get_most_recent_run(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) run_recovery = RunRecovery.download_best_checkpoints_from_child_runs(config, run) other_runs_folder = config.checkpoint_folder / OTHER_RUNS_SUBDIR_NAME assert other_runs_folder.is_dir() for child in ["0", "1"]: assert (other_runs_folder / child).is_dir(), "Child run folder does not exist" for checkpoint in run_recovery.get_best_checkpoint_paths(): assert checkpoint.is_file(), f"File {checkpoint} does not exist"
def test_compare_folder_against_parent_run( test_output_dirs: OutputFolderForTests) -> None: """ Test if we can compare that a set of files exists in an AML run. """ logging_to_stdout(log_level=logging.DEBUG) parent_run = get_most_recent_run( fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) file1 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_PARENT_FOLDER / \ CROSSVAL_RESULTS_FOLDER / "Test_outliers.txt" create_folder_and_write_text( file1, """ === METRIC: Dice === No outliers found === METRIC: HausdorffDistance_mm === No outliers found""") with mock.patch("InnerEye.ML.baselines_util.PARENT_RUN_CONTEXT", parent_run): # No plain files to compare. The file Test_outliers.txt should be compared and found to match. compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) create_folder_and_write_text(file1, "foo") with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) message = ex.value.args[0].splitlines() assert f"{baselines_util.CONTENTS_MISMATCH}: {CROSSVAL_RESULTS_FOLDER}/{file1.name}" in message # Now add a file to the set of expected files that does not exist in the run: comparison should now fail no_such_file = "no_such_file.txt" file2 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_PARENT_FOLDER / no_such_file create_folder_and_write_text(file2, "foo") with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) message = ex.value.args[0].splitlines() assert f"{baselines_util.MISSING_FILE}: {no_such_file}" in message # Now run the same comparison without mocking the PARENT_RUN_CONTEXT. This should now # realize that the present run is a crossval child run with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) assert "no (parent) run to compare against" in str(ex)
def test_get_cross_validation_split_index_ensemble_run() -> None: """ Test that retrieved cross validation split index is as expected, for ensembles. """ # check for offline run assert get_cross_validation_split_index( Run.get_context()) == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX # check for online runs run = get_most_recent_run( fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) assert get_cross_validation_split_index( run) == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX assert all([ get_cross_validation_split_index(x) > DEFAULT_CROSS_VALIDATION_SPLIT_INDEX for x in fetch_child_runs(run) ])