def run_in_situ(self, azure_run_info: AzureRunInfo) -> None: """ Actually run the AzureML job; this method will typically run on an Azure VM. :param azure_run_info: Contains all information about the present run in AzureML, in particular where the datasets are mounted. """ # Only set the logging level now. Usually, when we set logging to DEBUG, we want diagnostics about the model # build itself, but not the tons of debug information that AzureML submissions create. # Suppress the logging from all processes but the one for GPU 0 on each node, to make log files more readable logging_to_stdout( self.azure_config.log_level if is_local_rank_zero() else "ERROR") package_setup_and_hacks() if is_global_rank_zero(): self.print_git_tags() # For the PR build in AzureML, we can either pytest, or the training of the simple PR model. Running both # only works when using DDP_spawn, but that has as a side-effect that it messes up memory consumption of the # large models. if self.azure_config.pytest_mark: outputs_folder = Path.cwd() / fixed_paths.DEFAULT_AML_UPLOAD_DIR pytest_passed, results_file_path = run_pytest( self.azure_config.pytest_mark, outputs_folder) if not pytest_passed: # Terminate if pytest has failed. This makes the smoke test in # PR builds fail if pytest fails. pytest_failures = f"Not all PyTest tests passed. See {results_file_path}" raise ValueError(pytest_failures) else: # Set environment variables for multi-node training if needed. This function will terminate early # if it detects that it is not in a multi-node environment. set_environment_variables_for_multi_node() self.ml_runner = self.create_ml_runner() self.ml_runner.setup(azure_run_info) self.ml_runner.run()
def test_rnn_classifier_via_config_2(test_output_dirs: TestOutputDirectories) -> None: """ Test if we can build an RNN classifier that learns sequences, of the same kind as in test_rnn_classifier_toy_problem, but built via the config. """ expected_max_train_loss = 0.71 expected_max_val_loss = 0.71 num_sequences = 100 ml_util.set_random_seed(123) dataset_contents = "subject,index,feature,label\n" for subject in range(num_sequences): # Sequences have variable length sequence_length = np.random.choice([9, 10, 11, 12]) # Each sequence is a series of 0 and 1 inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3]) label = np.sum(inputs) > (sequence_length // 2) for i, value in enumerate(inputs.tolist()): dataset_contents += f"S{subject},{i},{value},{label}\n" logging_to_stdout() config = ToySequenceModel2(should_validate=False) config.num_epochs = 2 config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents) results = model_train(config) actual_train_loss = results.train_results_per_epoch[-1].values()[MetricType.LOSS.value][0] actual_val_loss = results.val_results_per_epoch[-1].values()[MetricType.LOSS.value][0] print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}") print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}") assert actual_train_loss <= expected_max_train_loss, "Training loss too high" assert actual_val_loss <= expected_max_val_loss, "Validation loss too high" assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \ == config.get_total_number_of_save_epochs() assert np.allclose(results.optimal_temperature_scale_values_per_checkpoint_epoch, [0.97], rtol=0.1)
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests, use_mixed_precision: bool) -> None: """ Test training and testing of 2d classification models. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting2D() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision model_training_result, checkpoint_handler = model_train_unittest( config, dirs=test_output_dirs) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151] expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595] actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification)
def test_rnn_classifier_via_config_1( use_combined_model: bool, imaging_feature_type: ImagingFeatureType, combine_hidden_state: bool, use_encoder_layer_norm: bool, use_mean_teacher_model: bool, test_output_dirs: OutputFolderForTests) -> None: """ Test if we can build a simple RNN model that only feeds off non-image features. This just tests the mechanics of training, but not if the model learned. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type=imaging_feature_type, combine_hidden_states=combine_hidden_state, use_encoder_layer_norm=use_encoder_layer_norm, use_mean_teacher_model=use_mean_teacher_model, should_validate=False) config.use_mixed_precision = True config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray]( images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): model_train( config, get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir))
def test_rnn_classifier_via_config_1( use_combined_model: bool, imaging_feature_type: ImagingFeatureType, combine_hidden_state: bool, use_encoder_layer_norm: bool, use_mean_teacher_model: bool, test_output_dirs: OutputFolderForTests) -> None: """ Test if we can build a simple RNN model that only feeds off non-image features. This just tests the mechanics of training, but not if the model learned. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type=imaging_feature_type, combine_hidden_states=combine_hidden_state, use_encoder_layer_norm=use_encoder_layer_norm, use_mean_teacher_model=use_mean_teacher_model, should_validate=False) # This fails with 16bit precision, saying "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are # unsafe to autocast. Many models use a sigmoid layer right before the binary cross entropy layer. In this case, # combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits or # torch.nn.BCEWithLogitsLoss. binary_cross_entropy_with_logits and BCEWithLogits are safe to autocast." config.use_mixed_precision = False config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray]( images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): model_train( config, get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir))
def test_run_ml_with_classification_model( test_output_dirs: OutputFolderForTests, number_of_offline_cross_validation_splits: int, model_name: str) -> None: """ Test training and testing of classification models, when it is started together via run_ml. """ logging_to_stdout() azure_config = get_default_azure_config() azure_config.train = True config: ScalarModelBase = ModelConfigLoader[ScalarModelBase]() \ .create_model_config_from_name(model_name) config.number_of_cross_validation_splits = number_of_offline_cross_validation_splits config.set_output_to(test_output_dirs.root_dir) # Trying to run DDP from the test suite hangs, hence restrict to single GPU. config.max_num_gpus = 1 MLRunner(config, azure_config).run() _check_offline_cross_validation_output_files(config) if config.perform_cross_validation: # Test that the result files can be correctly picked up by the cross validation routine. # For that, we point the downloader to the local results folder. The core download method # recognizes run_recovery_id == None as the signal to read from the local_run_results folder. config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files # One file for VAL and one for TRAIN for each child run assert len(result_files ) == config.get_total_number_of_cross_validation_runs() * 2 for file in result_files: assert file.execution_mode != ModelExecutionMode.TEST assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()
def test_rnn_classifier_via_config_2(test_output_dirs: OutputFolderForTests) -> None: """ Test if we can build an RNN classifier that learns sequences, of the same kind as in test_rnn_classifier_toy_problem, but built via the config. """ expected_max_train_loss = 0.71 expected_max_val_loss = 0.71 num_sequences = 100 ml_util.set_random_seed(123) dataset_contents = "subject,index,feature,label\n" for subject in range(num_sequences): # Sequences have variable length sequence_length = np.random.choice([9, 10, 11, 12]) # Each sequence is a series of 0 and 1 inputs = np.random.choice([0, 1], size=(sequence_length,), p=[1. / 3, 2. / 3]) label = np.sum(inputs) > (sequence_length // 2) for i, value in enumerate(inputs.tolist()): dataset_contents += f"S{subject},{i},{value},{label}\n" logging_to_stdout() config = ToySequenceModel2(should_validate=False) config.num_epochs = 2 config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset(dataset_contents) results, _ = model_train_unittest(config, dirs=test_output_dirs) actual_train_loss = results.get_metric(is_training=True, metric_type=MetricType.LOSS.value)[-1] actual_val_loss = results.get_metric(is_training=False, metric_type=MetricType.LOSS.value)[-1] print(f"Training loss after {config.num_epochs} epochs: {actual_train_loss}") print(f"Validation loss after {config.num_epochs} epochs: {actual_val_loss}") assert actual_train_loss <= expected_max_train_loss, "Training loss too high" assert actual_val_loss <= expected_max_val_loss, "Validation loss too high"
def main() -> None: """ Main function. """ logging_to_stdout() config = ReportStructureExtremesConfig.parse_args() report_structure_extremes(config.dataset, config.yaml_file)
def test_rnn_classifier_via_config_1(use_combined_model: bool, imaging_feature_type: ImagingFeatureType, combine_hidden_state: bool, use_encoder_layer_norm: bool, use_mean_teacher_model: bool, test_output_dirs: TestOutputDirectories) -> None: """ Test if we can build a simple RNN model that only feeds off non-image features. This just tests the mechanics of training, but not if the model learned. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type=imaging_feature_type, combine_hidden_states=combine_hidden_state, use_encoder_layer_norm=use_encoder_layer_norm, use_mean_teacher_model=use_mean_teacher_model, should_validate=False) config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): results = model_train(config) assert len(results.optimal_temperature_scale_values_per_checkpoint_epoch) \ == config.get_total_number_of_save_epochs()
def run(self) -> Tuple[Optional[DeepLearningConfig], Optional[Run]]: """ The main entry point for training and testing models from the commandline. This chooses a model to train via a commandline argument, runs training or testing, and writes all required info to disk and logs. :return: If submitting to AzureML, returns the model configuration that was used for training, including commandline overrides applied (if any). """ # Usually, when we set logging to DEBUG, we want diagnostics about the model # build itself, but not the tons of debug information that AzureML submissions create. logging_to_stdout(logging.INFO if is_local_rank_zero() else "ERROR") initialize_rpdb() user_agent.append(azure_util.INNEREYE_SDK_NAME, azure_util.INNEREYE_SDK_VERSION) self.parse_and_load_model() if self.lightning_container.perform_cross_validation: if self.model_config is None: raise NotImplementedError("Cross validation for LightingContainer models is not yet supported.") # force hyperdrive usage if performing cross validation self.azure_config.hyperdrive = True run_object: Optional[Run] = None if self.azure_config.azureml: run_object = self.submit_to_azureml() else: self.run_in_situ() if self.model_config is None: return self.lightning_container, run_object return self.model_config, run_object
def test_run_ml_with_sequence_model(use_combined_model: bool, imaging_feature_type: ImagingFeatureType, test_output_dirs: TestOutputDirectories) -> None: """ Test training and testing of sequence models, when it is started together via run_ml. """ logging_to_stdout() config = ToySequenceModel(use_combined_model, imaging_feature_type, should_validate=False, sequence_target_positions=[2, 10]) config.set_output_to(test_output_dirs.root_dir) config.dataset_data_frame = _get_mock_sequence_dataset() config.num_epochs = 1 config.max_batch_grad_cam = 1 # make sure we are testing with at least one sequence position that will not exist # to ensure correct handling of sequences that do not contain all the expected target positions assert max(config.sequence_target_positions) > config.dataset_data_frame[config.sequence_column].astype(float).max() # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray](images=np.random.uniform(0, 1, SCAN_SIZE), segmentations=np.random.randint(0, 2, SCAN_SIZE)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): azure_config = get_default_azure_config() azure_config.train = True MLRunner(config, azure_config).run()
def test_compare_folder_against_run( test_output_dirs: OutputFolderForTests) -> None: """ Test if we can compare that a set of files exists in an AML run. """ logging_to_stdout(log_level=logging.DEBUG) run = get_most_recent_run( fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN) file1 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_FOLDER / \ FINAL_MODEL_FOLDER / MODEL_INFERENCE_JSON_FILE_NAME create_folder_and_write_text( file1, '{"model_name": "BasicModel2Epochs", "checkpoint_paths": [' '"checkpoints/last.ckpt"], ' '"model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}' ) with mock.patch("InnerEye.ML.baselines_util.RUN_CONTEXT", run): # First comparison only on the .json file should pass compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) # Now add a file to the set of expected files that does not exist in the run: comparison should now fail no_such_file = "no_such_file.txt" file2 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_FOLDER / no_such_file create_folder_and_write_text(file2, "foo") with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) message = ex.value.args[0].splitlines() assert f"{baselines_util.MISSING_FILE}: {no_such_file}" in message # Now run the same comparison that failed previously, without mocking the RUN_CONTEXT. This should now # realize that the present run is an offline run, and skip the comparison compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0)
def test_is_completed_single_run() -> None: """ Test if we can correctly check run status for a single run. :return: """ logging_to_stdout() workspace = get_default_workspace() get_run_and_check(get_most_recent_run_id(), True, workspace)
def test_model_config_loader() -> None: logging_to_stdout(log_level=logging.DEBUG) default_loader = get_model_loader() assert default_loader.create_model_config_from_name("BasicModel2Epochs") is not None with pytest.raises(ValueError): default_loader.create_model_config_from_name("DummyModel") loader_including_tests = get_model_loader(namespace="Tests.ML.configs") assert loader_including_tests.create_model_config_from_name("BasicModel2Epochs") is not None assert loader_including_tests.create_model_config_from_name("DummyModel") is not None
def test_create_ml_runner_args(is_default_namespace: bool, test_output_dirs: TestOutputDirectories, is_offline_run: bool) -> None: """Test round trip parsing of commandline arguments: From arguments to the Azure runner to the arguments of the ML runner, checking that whatever is passed on can be correctly parsed.""" logging_to_stdout() model_name = "Lung" outputs_folder = Path(test_output_dirs.root_dir) project_root = fixed_paths.repository_root_directory() if is_default_namespace: model_configs_namespace = None else: model_configs_namespace = "Tests.ML.configs" model_name = "DummyModel" args_list = [ f"--model={model_name}", "--train=True", "--l_rate=100.0", "--norm_method=Simple Norm", "--subscription_id", "Test1", "--tenant_id=Test2", "--application_id", "Test3", "--datasets_storage_account=Test4", "--datasets_container", "Test5", "--pytest_mark", "gpu", f"--output_to={outputs_folder}" ] if not is_default_namespace: args_list.append( f"--model_configs_namespace={model_configs_namespace}") with mock.patch("sys.argv", [""] + args_list): with mock.patch( "InnerEye.ML.deep_learning_config.is_offline_run_context", return_value=is_offline_run): runner = Runner(project_root=project_root, yaml_config_file=fixed_paths.SETTINGS_YAML_FILE) runner.parse_and_load_model() azure_config = runner.azure_config model_config = runner.model_config assert azure_config.datasets_storage_account == "Test4" assert azure_config.model == model_name assert model_config.l_rate == 100.0 assert model_config.norm_method == PhotometricNormalizationMethod.SimpleNorm if is_offline_run: # The actual output folder must be a subfolder of the folder given on the commandline. The folder will contain # a timestamp, that will start with the year number, hence will start with 20... assert str(model_config.outputs_folder).startswith( str(outputs_folder / "20")) assert model_config.logs_folder == (model_config.outputs_folder / DEFAULT_LOGS_DIR_NAME) else: # For runs inside AzureML, the output folder is the project root (the root of the folders that are # included in the snapshot). The "outputs_to" argument will be ignored. assert model_config.outputs_folder == (project_root / DEFAULT_AML_UPLOAD_DIR) assert model_config.logs_folder == (project_root / DEFAULT_LOGS_DIR_NAME) assert not hasattr(model_config, "datasets_storage_account") assert azure_config.pytest_mark == "gpu"
def test_config_loader_on_lightning_container() -> None: """ Test if the config loader can load an model that is neither classification nor segmentation. """ # First test if the container can be instantiated at all (it is tricky to get that right when inheritance change) DummyContainerWithParameters() logging_to_stdout(log_level=logging.DEBUG) model = model_loader_including_tests.create_model_config_from_name("DummyContainerWithParameters") assert model is not None
def test_is_completed_ensemble_run() -> None: """ Test if we can correctly check run status and status of child runs for an ensemble run. :return: """ logging_to_stdout() workspace = get_default_workspace() run_id = get_most_recent_run_id( fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) get_run_and_check(run_id, True, workspace)
def main(settings_yaml_file: Optional[Path] = None, project_root: Optional[Path] = None) -> None: """ Main function. """ logging_to_stdout() config = ReportStructureExtremesConfig.parse_args() azure_config = AzureConfig.from_yaml(yaml_file_path=settings_yaml_file or config.settings, project_root=project_root) report_structure_extremes(config.dataset, azure_config)
def main(args: Optional[List[str]] = None, project_root: Optional[Path] = None) -> None: """ Main function. """ logging_to_stdout() inference_config = SubmitForInferenceConfig.parse_args(args) settings = inference_config.settings or fixed_paths.SETTINGS_YAML_FILE azure_config = AzureConfig.from_yaml(settings, project_root=project_root) if inference_config.cluster: azure_config.cluster = inference_config.cluster submit_for_inference(inference_config, azure_config)
def get_configs( default_model_config: SegmentationModelBase, yaml_file_path: Path ) -> Tuple[SegmentationModelBase, AzureConfig, Dict]: parser_result = create_parser(yaml_file_path) args = parser_result.args runner_config = AzureConfig(**args) logging_to_stdout(args["log_level"]) config = default_model_config or ModelConfigLoader( ).create_model_config_from_name(runner_config.model) config.apply_overrides(parser_result.overrides, should_validate=False) return config, runner_config, args
def plot_cross_validation(config: PlotCrossValidationConfig) -> Path: """ Collects results from an AzureML cross validation run, and writes aggregate metrics files. :param config: The settings for plotting cross validation results. :return: """ logging_to_stdout(logging.INFO) with logging_section("Downloading cross-validation results"): result_files, root_folder = download_crossval_result_files(config) config_and_files = OfflineCrossvalConfigAndFiles(config=config, files=result_files) with logging_section("Plotting cross-validation results"): plot_cross_validation_from_files(config_and_files, root_folder) return root_folder
def run_in_situ(self) -> None: """ Actually run the AzureML job; this method will typically run on an Azure VM. """ # Only set the logging level now. Usually, when we set logging to DEBUG, we want diagnostics about the model # build itself, but not the tons of debug information that AzureML submissions create. logging_to_stdout(self.azure_config.log_level) suppress_logging_noise() pytest_failed = False training_failed = False pytest_passed = True # Ensure that both model training and pytest both get executed in all cases, so that we see a full set of # test results in each PR outputs_folder = self.model_config.outputs_folder try: logging_to_file(self.model_config.logs_folder / LOG_FILE_NAME) try: self.create_ml_runner().run() except Exception as ex: print_exception(ex, "Model training/testing failed.") training_failed = True if self.azure_config.pytest_mark: try: pytest_passed, results_file_path = run_pytest( self.azure_config.pytest_mark, outputs_folder) if not pytest_passed: logging.error( f"Not all PyTest tests passed. See {results_file_path}" ) except Exception as ex: print_exception(ex, "Unable to run PyTest.") pytest_failed = True finally: # wait for aggregation if required, and only if the training actually succeeded. if not training_failed and self.model_config.should_wait_for_other_cross_val_child_runs( ): self.wait_for_cross_val_runs_to_finish_and_aggregate() disable_logging_to_file() message = [] if training_failed: message.append("Training failed") if pytest_failed: message.append("Unable to run Pytest") if not pytest_passed: message.append("At least 1 test in Pytest failed") # Terminate if pytest or model training has failed. This makes the smoke test in # PR builds fail if pytest fails. if message: raise ValueError( f"One component of the training pipeline failed: {'. '.join(message)}" )
def test_compare_folder_against_parent_run( test_output_dirs: OutputFolderForTests) -> None: """ Test if we can compare that a set of files exists in an AML run. """ logging_to_stdout(log_level=logging.DEBUG) parent_run = get_most_recent_run( fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) file1 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_PARENT_FOLDER / \ CROSSVAL_RESULTS_FOLDER / "Test_outliers.txt" create_folder_and_write_text( file1, """ === METRIC: Dice === No outliers found === METRIC: HausdorffDistance_mm === No outliers found""") with mock.patch("InnerEye.ML.baselines_util.PARENT_RUN_CONTEXT", parent_run): # No plain files to compare. The file Test_outliers.txt should be compared and found to match. compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) create_folder_and_write_text(file1, "foo") with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) message = ex.value.args[0].splitlines() assert f"{baselines_util.CONTENTS_MISMATCH}: {CROSSVAL_RESULTS_FOLDER}/{file1.name}" in message # Now add a file to the set of expected files that does not exist in the run: comparison should now fail no_such_file = "no_such_file.txt" file2 = test_output_dirs.root_dir / REGRESSION_TEST_AZUREML_PARENT_FOLDER / no_such_file create_folder_and_write_text(file2, "foo") with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) message = ex.value.args[0].splitlines() assert f"{baselines_util.MISSING_FILE}: {no_such_file}" in message # Now run the same comparison without mocking the PARENT_RUN_CONTEXT. This should now # realize that the present run is a crossval child run with pytest.raises(ValueError) as ex: compare_folders_and_run_outputs(expected=test_output_dirs.root_dir, actual=Path.cwd(), csv_relative_tolerance=0.0) assert "no (parent) run to compare against" in str(ex)
def test_run_ml_with_multi_label_sequence_model( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of sequence models that predicts at multiple time points, when it is started via run_ml. """ logging_to_stdout() config = ToyMultiLabelSequenceModel(should_validate=False) assert config.get_target_indices() == [1, 2, 3] expected_prediction_targets = [ f"{SEQUENCE_POSITION_HUE_NAME_PREFIX} {x}" for x in ["01", "02", "03"] ] _target_indices = config.get_target_indices() assert _target_indices is not None assert len(_target_indices) == len(expected_prediction_targets) metrics_dict = create_metrics_dict_for_scalar_models(config) assert metrics_dict.get_hue_names( include_default=False) == expected_prediction_targets config.set_output_to(test_output_dirs.root_dir) # Create a fake dataset directory to make config validation pass config.local_dataset = test_output_dirs.root_dir config.dataset_data_frame = _get_multi_label_sequence_dataframe() config.pre_process_dataset_dataframe() config.num_epochs = 1 config.max_batch_grad_cam = 1 azure_config = get_default_azure_config() azure_config.train = True MLRunner(config, azure_config).run() # The metrics file should have one entry per epoch per subject per prediction target, # for all the 3 prediction targets. metrics_file = config.outputs_folder / "Train" / SUBJECT_METRICS_FILE_NAME assert metrics_file.exists() metrics = pd.read_csv(metrics_file) assert LoggingColumns.Patient.value in metrics assert LoggingColumns.Epoch.value in metrics assert LoggingColumns.Hue.value in metrics assert metrics[LoggingColumns.Hue.value].unique().tolist( ) == expected_prediction_targets group_by_subject = metrics.groupby( by=[LoggingColumns.Patient.value, LoggingColumns.Epoch.value]) expected_prediction_target_lengths = [3, 2, 3, 3] for i, x in enumerate(group_by_subject): assert len(x[1]) == expected_prediction_target_lengths[i] group_by_subject_and_target = metrics.groupby(by=[ LoggingColumns.Patient.value, LoggingColumns.Epoch.value, LoggingColumns.Hue.value ]) for _, group in group_by_subject_and_target: assert len(group) == 1
def test_train_2d_classification_model(test_output_dirs: OutputFolderForTests, use_mixed_precision: bool) -> None: """ Test training and testing of 2d classification models. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting2D() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision config.save_start_epoch = 2 config.save_step_epochs = 2 config.test_start_epoch = 2 config.test_step_epochs = 2 config.test_diff_epochs = 2 expected_epochs = [2, 4] assert config.get_test_epochs() == expected_epochs checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [0.705931, 0.698664, 0.694489, 0.693151] expected_val_loss = [1.078517, 1.140510, 1.199026, 1.248595] def extract_loss(results: List[MetricsDict]) -> List[float]: return [d.values()[MetricType.LOSS.value][0] for d in results] actual_train_loss = extract_loss( model_training_result.train_results_per_epoch) actual_val_loss = extract_loss(model_training_result.val_results_per_epoch) actual_learning_rates = list( flatten(model_training_result.learning_rates_per_epoch)) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_learning_rates == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) assert list(test_results.epochs.keys()) == expected_epochs
def plot_cross_validation(config: PlotCrossValidationConfig) -> Path: """ Collects results from an AzureML cross validation run, and writes aggregate metrics files. and assert that there are N+1 data files available. If false, this analysis only concerns the cross validation runs, and check that the number of files is N. :param config: The settings for plotting cross validation results. :return: The path with all cross validation result files. """ logging_to_stdout(logging.INFO) with logging_section("Downloading cross-validation results"): result_files, root_folder = download_crossval_result_files(config) config_and_files = OfflineCrossvalConfigAndFiles(config=config, files=result_files) with logging_section("Plotting cross-validation results"): plot_cross_validation_from_files(config_and_files, root_folder) return root_folder
def test_compare_files_text(test_output_dirs: OutputFolderForTests, file_extension: str) -> None: """ Checks the basic code to compare the contents of two text files. :param test_output_dirs: :param file_extension: The extension of the file to create. """ logging_to_stdout(log_level=logging.DEBUG) expected = test_output_dirs.root_dir / f"expected{file_extension}" actual = test_output_dirs.root_dir / "actual.does_not_matter" # Make sure that we test different line endings - the files should still match create_folder_and_write_text(expected, "Line1\r\nLine2") create_folder_and_write_text(actual, "Line1\nLine2") assert compare_files(expected=expected, actual=actual) == "" actual.write_text("does_not_match") assert compare_files(expected=expected, actual=actual) == baselines_util.CONTENTS_MISMATCH
def test_image_encoder_with_segmentation( test_output_dirs: OutputFolderForTests, encode_channels_jointly: bool, aggregation_type: AggregationType, imaging_feature_type: ImagingFeatureType) -> None: """ Test if the image encoder networks can be trained on segmentations from HDF5. """ logging_to_stdout() set_random_seed(0) scan_size = (6, 64, 60) dataset_contents = """subject,channel,path,label S1,week0,scan1.h5, S1,week1,scan2.h5,True S2,week0,scan3.h5, S2,week1,scan4.h5,False S3,week0,scan5.h5, S3,week1,scan6.h5,True S4,week0,scan7.h5, S4,week1,scan8.h5,True """ config = ImageEncoder(encode_channels_jointly=encode_channels_jointly, imaging_feature_type=imaging_feature_type, should_validate=False, aggregation_type=aggregation_type, scan_size=scan_size) # This fails with 16bit precision, saying "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are # unsafe to autocast. Many models use a sigmoid layer right before the binary cross entropy layer. In this case, # combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits or # torch.nn.BCEWithLogitsLoss. binary_cross_entropy_with_logits and BCEWithLogits are safe to autocast." config.use_mixed_precision = False config.set_output_to(test_output_dirs.root_dir) config.num_epochs = 1 config.local_dataset = Path() config.dataset_data_frame = pd.read_csv(StringIO(dataset_contents), sep=",", dtype=str) # Patch the load_images function that will be called once we access a dataset item image_and_seg = ImageAndSegmentations[np.ndarray]( images=np.zeros(scan_size, dtype=np.float32), segmentations=np.ones(scan_size, dtype=np.uint8)) with mock.patch('InnerEye.ML.utils.io_util.load_image_in_known_formats', return_value=image_and_seg): azure_config = get_default_azure_config() azure_config.train = True MLRunner(config, azure_config).run()
def test_is_completed() -> None: """ Test if we can correctly check run status and status of child runs. :return: """ logging_to_stdout() workspace = get_default_workspace() def get_run_and_check(run_id: str, expected: bool) -> None: run = fetch_run(workspace, run_id) status = is_run_and_child_runs_completed(run) assert status == expected get_run_and_check(DEFAULT_RUN_RECOVERY_ID, True) get_run_and_check(DEFAULT_ENSEMBLE_RUN_RECOVERY_ID, True) # This Hyperdrive run has 1 failing child run, the parent run completed successfully. get_run_and_check( "refs_pull_326_merge:HD_d123f042-ca58-4e35-9a64-48d71c5f63a7", False)
def test_compare_files_binary(test_output_dirs: OutputFolderForTests, file_extension: str) -> None: """ Checks the comparison of files that are not recognized as text files, for example images. :param test_output_dirs: :param file_extension: The extension of the file to create. """ logging_to_stdout(log_level=logging.DEBUG) expected = test_output_dirs.root_dir / f"expected{file_extension}" actual = test_output_dirs.root_dir / "actual.does_not_matter" data1 = bytes([1, 2, 3]) data2 = bytes([4, 5, 6]) expected.write_bytes(data1) actual.write_bytes(data1) assert compare_files(expected=expected, actual=actual) == "" actual.write_bytes(data2) assert compare_files(expected=expected, actual=actual) == baselines_util.CONTENTS_MISMATCH