Пример #1
0
 def plot_cross_validation_and_upload_results(self) -> Path:
     from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \
         plot_cross_validation, unroll_aggregate_metrics
     # perform aggregation as cross val splits are now ready
     plot_crossval_config = crossval_config_from_model_config(
         self.model_config)
     plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[
         RUN_RECOVERY_ID_KEY_NAME]
     plot_crossval_config.outputs_directory = self.model_config.outputs_folder
     plot_crossval_config.settings_yaml_file = self.yaml_config_file
     cross_val_results_root = plot_cross_validation(plot_crossval_config)
     if self.post_cross_validation_hook:
         self.post_cross_validation_hook(self.model_config,
                                         cross_val_results_root)
     # upload results to the parent run's outputs. Normally, we use blobxfer for that, but here we want
     # to ensure that the files are visible inside the AzureML UI.
     PARENT_RUN_CONTEXT.upload_folder(name=CROSSVAL_RESULTS_FOLDER,
                                      path=str(cross_val_results_root))
     if self.model_config.is_scalar_model:
         try:
             aggregates = pd.read_csv(cross_val_results_root /
                                      METRICS_AGGREGATES_FILE)
             unrolled_aggregate_metrics = unroll_aggregate_metrics(
                 aggregates)
             for m in unrolled_aggregate_metrics:
                 PARENT_RUN_CONTEXT.log(m.metric_name, m.metric_value)
         except Exception as ex:
             print_exception(
                 ex,
                 "Unable to log metrics to Hyperdrive parent run.",
                 logger_fn=logging.warning)
     return cross_val_results_root
Пример #2
0
def segmentation_model_test(
    config: SegmentationModelBase,
    data_split: ModelExecutionMode,
    checkpoint_handler: CheckpointHandler,
    model_proc: ModelProcessing = ModelProcessing.DEFAULT
) -> InferenceMetricsForSegmentation:
    """
    The main testing loop for segmentation models.
    It loads the model and datasets, then proceeds to test the model for all requested checkpoints.
    :param config: The arguments object which has a valid random seed attribute.
    :param data_split: Indicates which of the 3 sets (training, test, or validation) is being processed.
    :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization
    :param model_proc: whether we are testing an ensemble or single model
    :return: InferenceMetric object that contains metrics related for all of the checkpoint epochs.
    """
    results: Dict[int, float] = {}
    checkpoints_to_test = checkpoint_handler.get_checkpoints_to_test()

    if not checkpoints_to_test:
        raise ValueError(
            "There were no checkpoints available for model testing.")

    for checkpoint_paths_and_epoch in checkpoints_to_test:
        epoch = checkpoint_paths_and_epoch.epoch
        epoch_results_folder = config.outputs_folder / get_epoch_results_path(
            epoch, data_split, model_proc)
        # save the datasets.csv used
        config.write_dataset_files(root=epoch_results_folder)
        epoch_and_split = "epoch {} {} set".format(epoch, data_split.value)
        epoch_dice_per_image = segmentation_model_test_epoch(
            config=copy.deepcopy(config),
            data_split=data_split,
            checkpoint_paths=checkpoint_paths_and_epoch.checkpoint_paths,
            results_folder=epoch_results_folder,
            epoch_and_split=epoch_and_split)
        if epoch_dice_per_image is None:
            logging.warning(
                "There is no checkpoint file for epoch {}".format(epoch))
        else:
            epoch_average_dice: float = np.mean(
                epoch_dice_per_image) if len(epoch_dice_per_image) > 0 else 0
            results[epoch] = epoch_average_dice
            logging.info("Epoch: {:3} | Mean Dice: {:4f}".format(
                epoch, epoch_average_dice))
            if model_proc == ModelProcessing.ENSEMBLE_CREATION:
                # For the upload, we want the path without the "OTHER_RUNS/ENSEMBLE" prefix.
                name = str(
                    get_epoch_results_path(epoch, data_split,
                                           ModelProcessing.DEFAULT))
                PARENT_RUN_CONTEXT.upload_folder(
                    name=name, path=str(epoch_results_folder))
    if len(results) == 0:
        raise ValueError(
            "There was no single checkpoint file available for model testing.")
    return InferenceMetricsForSegmentation(data_split=data_split,
                                           epochs=results)
Пример #3
0
 def log_to_azure(self, label: str, metric: float) -> None:
     """
     Logs a metric as a key/value pair to AzureML.
     """
     if not is_offline_run_context(RUN_CONTEXT):
         metric_name = self.logging_prefix + label
         RUN_CONTEXT.log(metric_name, metric)
         # When running in a cross validation setting, log all metrics to the hyperdrive parent run too,
         # so that we can easily overlay graphs across runs.
         if self.log_to_parent_run and PARENT_RUN_CONTEXT:
             if self.cross_validation_split_index > DEFAULT_CROSS_VALIDATION_SPLIT_INDEX:
                 PARENT_RUN_CONTEXT.log(f"{metric_name}_Split{self.cross_validation_split_index}",
                                        metric)
Пример #4
0
    def create_ensemble_model(self) -> None:
        """
        Call MLRunner again after training cross-validation models, to create an ensemble model from them.
        """
        # Import only here in case of dependency issues in reduced environment
        from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler
        # Adjust parameters
        self.azure_config.hyperdrive = False
        self.model_config.number_of_cross_validation_splits = 0
        self.model_config.is_train = False

        with logging_section("Downloading checkpoints from sibling runs"):
            checkpoint_handler = CheckpointHandler(
                model_config=self.model_config,
                azure_config=self.azure_config,
                project_root=self.project_root,
                run_context=PARENT_RUN_CONTEXT)
            checkpoint_handler.discover_and_download_checkpoint_from_sibling_runs(
                output_subdir_name=OTHER_RUNS_SUBDIR_NAME)

        best_epoch = self.create_ml_runner().run_inference_and_register_model(
            checkpoint_handler=checkpoint_handler,
            model_proc=ModelProcessing.ENSEMBLE_CREATION)

        crossval_dir = self.plot_cross_validation_and_upload_results()
        Runner.generate_report(self.model_config, best_epoch,
                               ModelProcessing.ENSEMBLE_CREATION)
        # CrossValResults should have been uploaded to the parent run, so we don't need it here.
        remove_file_or_directory(crossval_dir)
        # We can also remove OTHER_RUNS under the root, as it is no longer useful and only contains copies of files
        # available elsewhere. However, first we need to upload relevant parts of OTHER_RUNS/ENSEMBLE.
        other_runs_dir = self.model_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME
        other_runs_ensemble_dir = other_runs_dir / ENSEMBLE_SPLIT_NAME
        if PARENT_RUN_CONTEXT is not None:
            if other_runs_ensemble_dir.exists():
                # Only keep baseline Wilcoxon results and scatterplots and reports
                for subdir in other_runs_ensemble_dir.glob("*"):
                    if subdir.name not in [
                            BASELINE_WILCOXON_RESULTS_FILE,
                            SCATTERPLOTS_SUBDIR_NAME, REPORT_HTML, REPORT_IPYNB
                    ]:
                        remove_file_or_directory(subdir)
                PARENT_RUN_CONTEXT.upload_folder(
                    name=BASELINE_COMPARISONS_FOLDER,
                    path=str(other_runs_ensemble_dir))
            else:
                logging.warning(
                    f"Directory not found for upload: {other_runs_ensemble_dir}"
                )
        remove_file_or_directory(other_runs_dir)
Пример #5
0
 def set_run_tags_from_parent(self) -> None:
     """
     Set metadata for the run
     """
     assert PARENT_RUN_CONTEXT, "This function should only be called in a Hyperdrive run."
     run_tags_parent = PARENT_RUN_CONTEXT.get_tags()
     tags_to_copy = [
         "tag", "model_name", "execution_mode", "recovered_from",
         "friendly_name", "build_number", "build_user", "source_repository",
         "source_branch", "source_id", "source_message", "source_author",
         "source_dirty", RUN_RECOVERY_FROM_ID_KEY_NAME
     ]
     new_tags = {tag: run_tags_parent.get(tag, "") for tag in tags_to_copy}
     new_tags[RUN_RECOVERY_ID_KEY_NAME] = create_run_recovery_id(
         run=RUN_CONTEXT)
     new_tags[CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY] = str(
         self.model_config.cross_validation_split_index)
     new_tags[EFFECTIVE_RANDOM_SEED_KEY_NAME] = str(
         self.model_config.get_effective_random_seed())
     if isinstance(self.model_config, ScalarModelBase):
         new_tags[NUMBER_OF_CROSS_VALIDATION_SPLITS_PER_FOLD_KEY_NAME] = str(
             self.model_config.number_of_cross_validation_splits_per_fold)
         new_tags[CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY] = str(
             self.model_config.cross_validation_sub_fold_split_index)
     RUN_CONTEXT.set_tags(new_tags)
Пример #6
0
def segmentation_model_test(
    config: SegmentationModelBase,
    execution_mode: ModelExecutionMode,
    checkpoint_paths: List[Path],
    model_proc: ModelProcessing = ModelProcessing.DEFAULT
) -> InferenceMetricsForSegmentation:
    """
    The main testing loop for segmentation models.
    It loads the model and datasets, then proceeds to test the model for all requested checkpoints.
    :param config: The arguments object which has a valid random seed attribute.
    :param execution_mode: Indicates which of the 3 sets (training, test, or validation) is being processed.
    :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization.
    :param model_proc: Whether we are testing an ensemble or single model.
    :param patient_id: String which contains subject identifier.
    :return: InferenceMetric object that contains metrics related for all of the checkpoint epochs.
    """

    epoch_results_folder = config.outputs_folder / get_best_epoch_results_path(
        execution_mode, model_proc)
    # save the datasets.csv used
    config.write_dataset_files(root=epoch_results_folder)
    epoch_and_split = f"{execution_mode.value} set"
    epoch_dice_per_image = segmentation_model_test_epoch(
        config=copy.deepcopy(config),
        execution_mode=execution_mode,
        checkpoint_paths=checkpoint_paths,
        results_folder=epoch_results_folder,
        epoch_and_split=epoch_and_split)
    if epoch_dice_per_image is None:
        raise ValueError(
            "There was no single checkpoint file available for model testing.")
    else:
        epoch_average_dice: float = np.mean(
            epoch_dice_per_image) if len(epoch_dice_per_image) > 0 else 0
        result = epoch_average_dice
        logging.info(f"Mean Dice: {epoch_average_dice:4f}")
        if model_proc == ModelProcessing.ENSEMBLE_CREATION:
            # For the upload, we want the path without the "OTHER_RUNS/ENSEMBLE" prefix.
            name = str(
                get_best_epoch_results_path(execution_mode,
                                            ModelProcessing.DEFAULT))
            PARENT_RUN_CONTEXT.upload_folder(name=name,
                                             path=str(epoch_results_folder))
    return InferenceMetricsForSegmentation(execution_mode=execution_mode,
                                           metrics=result)
Пример #7
0
    def create_ensemble_model(self) -> None:
        """
        Create an ensemble model from the results of the sibling runs of the present run. The present run here will
        be cross validation child run 0.
        """
        assert PARENT_RUN_CONTEXT, "This function should only be called in a Hyperdrive run"
        with logging_section("Downloading checkpoints from sibling runs"):
            checkpoint_handler = CheckpointHandler(
                model_config=self.model_config,
                azure_config=self.azure_config,
                project_root=self.project_root,
                run_context=PARENT_RUN_CONTEXT)
            checkpoint_handler.download_checkpoints_from_hyperdrive_child_runs(
                PARENT_RUN_CONTEXT)

        self.run_inference_and_register_model(
            checkpoint_handler=checkpoint_handler,
            model_proc=ModelProcessing.ENSEMBLE_CREATION)

        crossval_dir = self.plot_cross_validation_and_upload_results()
        self.generate_report(ModelProcessing.ENSEMBLE_CREATION)
        # CrossValResults should have been uploaded to the parent run, so we don't need it here.
        remove_file_or_directory(crossval_dir)
        # We can also remove OTHER_RUNS under the root, as it is no longer useful and only contains copies of files
        # available elsewhere. However, first we need to upload relevant parts of OTHER_RUNS/ENSEMBLE.
        other_runs_dir = self.model_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME
        other_runs_ensemble_dir = other_runs_dir / ENSEMBLE_SPLIT_NAME
        if PARENT_RUN_CONTEXT is not None:
            if other_runs_ensemble_dir.exists():
                # Only keep baseline Wilcoxon results and scatterplots and reports
                for subdir in other_runs_ensemble_dir.glob("*"):
                    if subdir.name not in [
                            BASELINE_WILCOXON_RESULTS_FILE,
                            SCATTERPLOTS_SUBDIR_NAME, REPORT_HTML, REPORT_IPYNB
                    ]:
                        remove_file_or_directory(subdir)
                PARENT_RUN_CONTEXT.upload_folder(
                    name=BASELINE_COMPARISONS_FOLDER,
                    path=str(other_runs_ensemble_dir))
            else:
                logging.warning(
                    f"Directory not found for upload: {other_runs_ensemble_dir}"
                )
        remove_file_or_directory(other_runs_dir)