def plot_cross_validation_and_upload_results(self) -> Path: from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \ plot_cross_validation, unroll_aggregate_metrics # perform aggregation as cross val splits are now ready plot_crossval_config = crossval_config_from_model_config( self.model_config) plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[ RUN_RECOVERY_ID_KEY_NAME] plot_crossval_config.outputs_directory = self.model_config.outputs_folder plot_crossval_config.settings_yaml_file = self.yaml_config_file cross_val_results_root = plot_cross_validation(plot_crossval_config) if self.post_cross_validation_hook: self.post_cross_validation_hook(self.model_config, cross_val_results_root) # upload results to the parent run's outputs. Normally, we use blobxfer for that, but here we want # to ensure that the files are visible inside the AzureML UI. PARENT_RUN_CONTEXT.upload_folder(name=CROSSVAL_RESULTS_FOLDER, path=str(cross_val_results_root)) if self.model_config.is_scalar_model: try: aggregates = pd.read_csv(cross_val_results_root / METRICS_AGGREGATES_FILE) unrolled_aggregate_metrics = unroll_aggregate_metrics( aggregates) for m in unrolled_aggregate_metrics: PARENT_RUN_CONTEXT.log(m.metric_name, m.metric_value) except Exception as ex: print_exception( ex, "Unable to log metrics to Hyperdrive parent run.", logger_fn=logging.warning) return cross_val_results_root
def segmentation_model_test( config: SegmentationModelBase, data_split: ModelExecutionMode, checkpoint_handler: CheckpointHandler, model_proc: ModelProcessing = ModelProcessing.DEFAULT ) -> InferenceMetricsForSegmentation: """ The main testing loop for segmentation models. It loads the model and datasets, then proceeds to test the model for all requested checkpoints. :param config: The arguments object which has a valid random seed attribute. :param data_split: Indicates which of the 3 sets (training, test, or validation) is being processed. :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization :param model_proc: whether we are testing an ensemble or single model :return: InferenceMetric object that contains metrics related for all of the checkpoint epochs. """ results: Dict[int, float] = {} checkpoints_to_test = checkpoint_handler.get_checkpoints_to_test() if not checkpoints_to_test: raise ValueError( "There were no checkpoints available for model testing.") for checkpoint_paths_and_epoch in checkpoints_to_test: epoch = checkpoint_paths_and_epoch.epoch epoch_results_folder = config.outputs_folder / get_epoch_results_path( epoch, data_split, model_proc) # save the datasets.csv used config.write_dataset_files(root=epoch_results_folder) epoch_and_split = "epoch {} {} set".format(epoch, data_split.value) epoch_dice_per_image = segmentation_model_test_epoch( config=copy.deepcopy(config), data_split=data_split, checkpoint_paths=checkpoint_paths_and_epoch.checkpoint_paths, results_folder=epoch_results_folder, epoch_and_split=epoch_and_split) if epoch_dice_per_image is None: logging.warning( "There is no checkpoint file for epoch {}".format(epoch)) else: epoch_average_dice: float = np.mean( epoch_dice_per_image) if len(epoch_dice_per_image) > 0 else 0 results[epoch] = epoch_average_dice logging.info("Epoch: {:3} | Mean Dice: {:4f}".format( epoch, epoch_average_dice)) if model_proc == ModelProcessing.ENSEMBLE_CREATION: # For the upload, we want the path without the "OTHER_RUNS/ENSEMBLE" prefix. name = str( get_epoch_results_path(epoch, data_split, ModelProcessing.DEFAULT)) PARENT_RUN_CONTEXT.upload_folder( name=name, path=str(epoch_results_folder)) if len(results) == 0: raise ValueError( "There was no single checkpoint file available for model testing.") return InferenceMetricsForSegmentation(data_split=data_split, epochs=results)
def create_ensemble_model(self) -> None: """ Call MLRunner again after training cross-validation models, to create an ensemble model from them. """ # Import only here in case of dependency issues in reduced environment from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler # Adjust parameters self.azure_config.hyperdrive = False self.model_config.number_of_cross_validation_splits = 0 self.model_config.is_train = False with logging_section("Downloading checkpoints from sibling runs"): checkpoint_handler = CheckpointHandler( model_config=self.model_config, azure_config=self.azure_config, project_root=self.project_root, run_context=PARENT_RUN_CONTEXT) checkpoint_handler.discover_and_download_checkpoint_from_sibling_runs( output_subdir_name=OTHER_RUNS_SUBDIR_NAME) best_epoch = self.create_ml_runner().run_inference_and_register_model( checkpoint_handler=checkpoint_handler, model_proc=ModelProcessing.ENSEMBLE_CREATION) crossval_dir = self.plot_cross_validation_and_upload_results() Runner.generate_report(self.model_config, best_epoch, ModelProcessing.ENSEMBLE_CREATION) # CrossValResults should have been uploaded to the parent run, so we don't need it here. remove_file_or_directory(crossval_dir) # We can also remove OTHER_RUNS under the root, as it is no longer useful and only contains copies of files # available elsewhere. However, first we need to upload relevant parts of OTHER_RUNS/ENSEMBLE. other_runs_dir = self.model_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME other_runs_ensemble_dir = other_runs_dir / ENSEMBLE_SPLIT_NAME if PARENT_RUN_CONTEXT is not None: if other_runs_ensemble_dir.exists(): # Only keep baseline Wilcoxon results and scatterplots and reports for subdir in other_runs_ensemble_dir.glob("*"): if subdir.name not in [ BASELINE_WILCOXON_RESULTS_FILE, SCATTERPLOTS_SUBDIR_NAME, REPORT_HTML, REPORT_IPYNB ]: remove_file_or_directory(subdir) PARENT_RUN_CONTEXT.upload_folder( name=BASELINE_COMPARISONS_FOLDER, path=str(other_runs_ensemble_dir)) else: logging.warning( f"Directory not found for upload: {other_runs_ensemble_dir}" ) remove_file_or_directory(other_runs_dir)
def segmentation_model_test( config: SegmentationModelBase, execution_mode: ModelExecutionMode, checkpoint_paths: List[Path], model_proc: ModelProcessing = ModelProcessing.DEFAULT ) -> InferenceMetricsForSegmentation: """ The main testing loop for segmentation models. It loads the model and datasets, then proceeds to test the model for all requested checkpoints. :param config: The arguments object which has a valid random seed attribute. :param execution_mode: Indicates which of the 3 sets (training, test, or validation) is being processed. :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization. :param model_proc: Whether we are testing an ensemble or single model. :param patient_id: String which contains subject identifier. :return: InferenceMetric object that contains metrics related for all of the checkpoint epochs. """ epoch_results_folder = config.outputs_folder / get_best_epoch_results_path( execution_mode, model_proc) # save the datasets.csv used config.write_dataset_files(root=epoch_results_folder) epoch_and_split = f"{execution_mode.value} set" epoch_dice_per_image = segmentation_model_test_epoch( config=copy.deepcopy(config), execution_mode=execution_mode, checkpoint_paths=checkpoint_paths, results_folder=epoch_results_folder, epoch_and_split=epoch_and_split) if epoch_dice_per_image is None: raise ValueError( "There was no single checkpoint file available for model testing.") else: epoch_average_dice: float = np.mean( epoch_dice_per_image) if len(epoch_dice_per_image) > 0 else 0 result = epoch_average_dice logging.info(f"Mean Dice: {epoch_average_dice:4f}") if model_proc == ModelProcessing.ENSEMBLE_CREATION: # For the upload, we want the path without the "OTHER_RUNS/ENSEMBLE" prefix. name = str( get_best_epoch_results_path(execution_mode, ModelProcessing.DEFAULT)) PARENT_RUN_CONTEXT.upload_folder(name=name, path=str(epoch_results_folder)) return InferenceMetricsForSegmentation(execution_mode=execution_mode, metrics=result)
def create_ensemble_model(self) -> None: """ Create an ensemble model from the results of the sibling runs of the present run. The present run here will be cross validation child run 0. """ assert PARENT_RUN_CONTEXT, "This function should only be called in a Hyperdrive run" with logging_section("Downloading checkpoints from sibling runs"): checkpoint_handler = CheckpointHandler( model_config=self.model_config, azure_config=self.azure_config, project_root=self.project_root, run_context=PARENT_RUN_CONTEXT) checkpoint_handler.download_checkpoints_from_hyperdrive_child_runs( PARENT_RUN_CONTEXT) self.run_inference_and_register_model( checkpoint_handler=checkpoint_handler, model_proc=ModelProcessing.ENSEMBLE_CREATION) crossval_dir = self.plot_cross_validation_and_upload_results() self.generate_report(ModelProcessing.ENSEMBLE_CREATION) # CrossValResults should have been uploaded to the parent run, so we don't need it here. remove_file_or_directory(crossval_dir) # We can also remove OTHER_RUNS under the root, as it is no longer useful and only contains copies of files # available elsewhere. However, first we need to upload relevant parts of OTHER_RUNS/ENSEMBLE. other_runs_dir = self.model_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME other_runs_ensemble_dir = other_runs_dir / ENSEMBLE_SPLIT_NAME if PARENT_RUN_CONTEXT is not None: if other_runs_ensemble_dir.exists(): # Only keep baseline Wilcoxon results and scatterplots and reports for subdir in other_runs_ensemble_dir.glob("*"): if subdir.name not in [ BASELINE_WILCOXON_RESULTS_FILE, SCATTERPLOTS_SUBDIR_NAME, REPORT_HTML, REPORT_IPYNB ]: remove_file_or_directory(subdir) PARENT_RUN_CONTEXT.upload_folder( name=BASELINE_COMPARISONS_FOLDER, path=str(other_runs_ensemble_dir)) else: logging.warning( f"Directory not found for upload: {other_runs_ensemble_dir}" ) remove_file_or_directory(other_runs_dir)