def test_unroll_aggregates() -> None: # This is an output file of a CV run on a classification model, shuffled such that epochs are not in the right # order. file = io.StringIO( """area_under_roc_curve,area_under_pr_curve,cross_entropy,subject_count,data_split,epoch 1.00000,1.00000,0.70290,3,Val,4 1.00000,1.00000,0.70339,3,Val,1 1.00000,1.00000,0.70323,3,Val,2 1.00000,1.00000,0.70306,3,Val,3 """) df = pd.read_csv(file) unrolled = unroll_aggregate_metrics(df) expected_metrics = { LoggingColumns.CrossEntropy.value, LoggingColumns.AreaUnderPRCurve.value, LoggingColumns.AreaUnderRocCurve.value, LoggingColumns.SubjectCount.value } expected_epochs = set(range(1, 5)) assert len(unrolled) == len(expected_epochs) * len(expected_metrics) actual_metrics = set(m.metric_name for m in unrolled) assert actual_metrics == expected_metrics actual_epochs = set(m.epoch for m in unrolled) assert actual_epochs == expected_epochs assert unrolled[0] == EpochMetricValues( 1, LoggingColumns.AreaUnderPRCurve.value, 1.0) assert unrolled[-2] == EpochMetricValues(4, LoggingColumns.CrossEntropy.value, 0.7029) assert unrolled[-1] == EpochMetricValues(4, LoggingColumns.SubjectCount.value, 3)
def plot_cross_validation_and_upload_results(self) -> Path: from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \ plot_cross_validation, unroll_aggregate_metrics # perform aggregation as cross val splits are now ready plot_crossval_config = crossval_config_from_model_config( self.model_config) plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[ RUN_RECOVERY_ID_KEY_NAME] plot_crossval_config.outputs_directory = self.model_config.outputs_folder plot_crossval_config.settings_yaml_file = self.yaml_config_file cross_val_results_root = plot_cross_validation(plot_crossval_config) if self.post_cross_validation_hook: self.post_cross_validation_hook(self.model_config, cross_val_results_root) # upload results to the parent run's outputs. Normally, we use blobxfer for that, but here we want # to ensure that the files are visible inside the AzureML UI. PARENT_RUN_CONTEXT.upload_folder(name=CROSSVAL_RESULTS_FOLDER, path=str(cross_val_results_root)) if self.model_config.is_scalar_model: try: aggregates = pd.read_csv(cross_val_results_root / METRICS_AGGREGATES_FILE) unrolled_aggregate_metrics = unroll_aggregate_metrics( aggregates) for m in unrolled_aggregate_metrics: PARENT_RUN_CONTEXT.log(m.metric_name, m.metric_value) except Exception as ex: print_exception( ex, "Unable to log metrics to Hyperdrive parent run.", logger_fn=logging.warning) return cross_val_results_root
def _check_offline_cross_validation_output_files( train_config: ScalarModelBase) -> None: metrics: Dict[ModelExecutionMode, List[pd.DataFrame]] = dict() root = Path(train_config.file_system_config.outputs_folder) for x in range(train_config.get_total_number_of_cross_validation_runs()): expected_outputs_folder = root / str(x) assert expected_outputs_folder.exists() for m in [ModelExecutionMode.TRAIN, ModelExecutionMode.VAL]: metrics_path = expected_outputs_folder / m.value / METRICS_FILE_NAME assert metrics_path.exists() split_metrics = pd.read_csv(metrics_path) if m in metrics: # check that metrics for any two folds is not the same assert not any([split_metrics.equals(x) for x in metrics[m]]) metrics[m] = [split_metrics] if train_config.perform_cross_validation: # test aggregates are as expected aggregate_metrics_path = root / CROSSVAL_RESULTS_FOLDER / METRICS_AGGREGATES_FILE assert aggregate_metrics_path.is_file() # since we aggregate the outputs of each of the child folds # we need to compare the outputs w.r.t to the parent folds child_folds = train_config.number_of_cross_validation_splits_per_fold if train_config.perform_sub_fold_cross_validation: train_config.number_of_cross_validation_splits_per_fold = 0 _dataset_splits = train_config.get_dataset_splits() train_config.number_of_cross_validation_splits_per_fold = child_folds _val_dataset_split_count = len( _dataset_splits.val[train_config.subject_column].unique()) + len( _dataset_splits.train[train_config.subject_column].unique()) _aggregates_csv = pd.read_csv(aggregate_metrics_path) _counts_for_splits = list( _aggregates_csv[LoggingColumns.SubjectCount.value]) assert all([x == _val_dataset_split_count for x in _counts_for_splits]) _epochs = list(_aggregates_csv[LoggingColumns.Epoch.value]) # Each epoch is recorded twice once for the training split and once for the validation # split assert len(_epochs) == train_config.num_epochs * 2 assert all([ x + 1 in _epochs for x in list(range(train_config.num_epochs)) * 2 ]) # Only the validation mode is kept for unrolled aggregates unrolled = unroll_aggregate_metrics(_aggregates_csv) if train_config.is_classification_model: expected_metrics = { LoggingColumns.CrossEntropy.value, LoggingColumns.AreaUnderPRCurve.value, LoggingColumns.AreaUnderRocCurve.value, LoggingColumns.FalseNegativeRateAtOptimalThreshold.value, LoggingColumns.FalsePositiveRateAtOptimalThreshold.value, LoggingColumns.AccuracyAtOptimalThreshold.value, LoggingColumns.OptimalThreshold.value, LoggingColumns.AccuracyAtThreshold05.value } else: expected_metrics = { LoggingColumns.MeanAbsoluteError.value, LoggingColumns.MeanSquaredError.value, LoggingColumns.R2Score.value } expected_metrics = expected_metrics.union( {LoggingColumns.SubjectCount.value}) assert len(unrolled) == train_config.num_epochs * len(expected_metrics) actual_metrics = set(m.metric_name for m in unrolled) assert actual_metrics == expected_metrics actual_epochs = set(m.epoch for m in unrolled) assert actual_epochs == set(_epochs)