def test_unroll_aggregates() -> None:
    # This is an output file of a CV run on a classification model, shuffled such that epochs are not in the right
    # order.
    file = io.StringIO(
        """area_under_roc_curve,area_under_pr_curve,cross_entropy,subject_count,data_split,epoch
1.00000,1.00000,0.70290,3,Val,4
1.00000,1.00000,0.70339,3,Val,1
1.00000,1.00000,0.70323,3,Val,2
1.00000,1.00000,0.70306,3,Val,3
""")
    df = pd.read_csv(file)
    unrolled = unroll_aggregate_metrics(df)
    expected_metrics = {
        LoggingColumns.CrossEntropy.value,
        LoggingColumns.AreaUnderPRCurve.value,
        LoggingColumns.AreaUnderRocCurve.value,
        LoggingColumns.SubjectCount.value
    }
    expected_epochs = set(range(1, 5))
    assert len(unrolled) == len(expected_epochs) * len(expected_metrics)
    actual_metrics = set(m.metric_name for m in unrolled)
    assert actual_metrics == expected_metrics
    actual_epochs = set(m.epoch for m in unrolled)
    assert actual_epochs == expected_epochs
    assert unrolled[0] == EpochMetricValues(
        1, LoggingColumns.AreaUnderPRCurve.value, 1.0)
    assert unrolled[-2] == EpochMetricValues(4,
                                             LoggingColumns.CrossEntropy.value,
                                             0.7029)
    assert unrolled[-1] == EpochMetricValues(4,
                                             LoggingColumns.SubjectCount.value,
                                             3)
Пример #2
0
 def plot_cross_validation_and_upload_results(self) -> Path:
     from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \
         plot_cross_validation, unroll_aggregate_metrics
     # perform aggregation as cross val splits are now ready
     plot_crossval_config = crossval_config_from_model_config(
         self.model_config)
     plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[
         RUN_RECOVERY_ID_KEY_NAME]
     plot_crossval_config.outputs_directory = self.model_config.outputs_folder
     plot_crossval_config.settings_yaml_file = self.yaml_config_file
     cross_val_results_root = plot_cross_validation(plot_crossval_config)
     if self.post_cross_validation_hook:
         self.post_cross_validation_hook(self.model_config,
                                         cross_val_results_root)
     # upload results to the parent run's outputs. Normally, we use blobxfer for that, but here we want
     # to ensure that the files are visible inside the AzureML UI.
     PARENT_RUN_CONTEXT.upload_folder(name=CROSSVAL_RESULTS_FOLDER,
                                      path=str(cross_val_results_root))
     if self.model_config.is_scalar_model:
         try:
             aggregates = pd.read_csv(cross_val_results_root /
                                      METRICS_AGGREGATES_FILE)
             unrolled_aggregate_metrics = unroll_aggregate_metrics(
                 aggregates)
             for m in unrolled_aggregate_metrics:
                 PARENT_RUN_CONTEXT.log(m.metric_name, m.metric_value)
         except Exception as ex:
             print_exception(
                 ex,
                 "Unable to log metrics to Hyperdrive parent run.",
                 logger_fn=logging.warning)
     return cross_val_results_root
def _check_offline_cross_validation_output_files(
        train_config: ScalarModelBase) -> None:
    metrics: Dict[ModelExecutionMode, List[pd.DataFrame]] = dict()
    root = Path(train_config.file_system_config.outputs_folder)
    for x in range(train_config.get_total_number_of_cross_validation_runs()):
        expected_outputs_folder = root / str(x)
        assert expected_outputs_folder.exists()
        for m in [ModelExecutionMode.TRAIN, ModelExecutionMode.VAL]:
            metrics_path = expected_outputs_folder / m.value / METRICS_FILE_NAME
            assert metrics_path.exists()
            split_metrics = pd.read_csv(metrics_path)
            if m in metrics:
                # check that metrics for any two folds is not the same
                assert not any([split_metrics.equals(x) for x in metrics[m]])
            metrics[m] = [split_metrics]
    if train_config.perform_cross_validation:
        # test aggregates are as expected
        aggregate_metrics_path = root / CROSSVAL_RESULTS_FOLDER / METRICS_AGGREGATES_FILE
        assert aggregate_metrics_path.is_file()
        # since we aggregate the outputs of each of the child folds
        # we need to compare the outputs w.r.t to the parent folds
        child_folds = train_config.number_of_cross_validation_splits_per_fold
        if train_config.perform_sub_fold_cross_validation:
            train_config.number_of_cross_validation_splits_per_fold = 0
        _dataset_splits = train_config.get_dataset_splits()
        train_config.number_of_cross_validation_splits_per_fold = child_folds

        _val_dataset_split_count = len(
            _dataset_splits.val[train_config.subject_column].unique()) + len(
                _dataset_splits.train[train_config.subject_column].unique())
        _aggregates_csv = pd.read_csv(aggregate_metrics_path)
        _counts_for_splits = list(
            _aggregates_csv[LoggingColumns.SubjectCount.value])
        assert all([x == _val_dataset_split_count for x in _counts_for_splits])
        _epochs = list(_aggregates_csv[LoggingColumns.Epoch.value])
        # Each epoch is recorded twice once for the training split and once for the validation
        # split
        assert len(_epochs) == train_config.num_epochs * 2
        assert all([
            x + 1 in _epochs for x in list(range(train_config.num_epochs)) * 2
        ])
        # Only the validation mode is kept for unrolled aggregates
        unrolled = unroll_aggregate_metrics(_aggregates_csv)
        if train_config.is_classification_model:
            expected_metrics = {
                LoggingColumns.CrossEntropy.value,
                LoggingColumns.AreaUnderPRCurve.value,
                LoggingColumns.AreaUnderRocCurve.value,
                LoggingColumns.FalseNegativeRateAtOptimalThreshold.value,
                LoggingColumns.FalsePositiveRateAtOptimalThreshold.value,
                LoggingColumns.AccuracyAtOptimalThreshold.value,
                LoggingColumns.OptimalThreshold.value,
                LoggingColumns.AccuracyAtThreshold05.value
            }
        else:
            expected_metrics = {
                LoggingColumns.MeanAbsoluteError.value,
                LoggingColumns.MeanSquaredError.value,
                LoggingColumns.R2Score.value
            }
        expected_metrics = expected_metrics.union(
            {LoggingColumns.SubjectCount.value})
        assert len(unrolled) == train_config.num_epochs * len(expected_metrics)
        actual_metrics = set(m.metric_name for m in unrolled)
        assert actual_metrics == expected_metrics
        actual_epochs = set(m.epoch for m in unrolled)
        assert actual_epochs == set(_epochs)