def test_save_outliers(test_config: PlotCrossValidationConfig, test_output_dirs: OutputFolderForTests) -> None: """Test to make sure the outlier file for a split is as expected""" test_config.outputs_directory = test_output_dirs.root_dir test_config.outlier_range = 0 assert test_config.run_recovery_id dataset_split_metrics = { x: _get_metrics_df(test_config.run_recovery_id, x) for x in [ModelExecutionMode.VAL] } save_outliers(test_config, dataset_split_metrics, test_config.outputs_directory) filename = f"{ModelExecutionMode.VAL.value}_outliers.txt" assert_text_files_match(full_file=test_config.outputs_directory / filename, expected_file=full_ml_test_data_path(filename)) # Now test without the CSV_INSTITUTION_HEADER and CSV_SERIES_HEADER columns, which will be missing in institutions' environments dataset_split_metrics_pruned = { x: _get_metrics_df(test_config.run_recovery_id, x).drop( columns=[CSV_INSTITUTION_HEADER, CSV_SERIES_HEADER], errors="ignore") for x in [ModelExecutionMode.VAL] } save_outliers(test_config, dataset_split_metrics_pruned, test_config.outputs_directory) test_data_filename = f"{ModelExecutionMode.VAL.value}_outliers_pruned.txt" assert_text_files_match( full_file=test_config.outputs_directory / filename, expected_file=full_ml_test_data_path(test_data_filename))
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig, test_output_dirs: OutputFolderForTests) -> None: """Test to make sure the outlier file for a split is as expected""" test_config_ensemble.outputs_directory = test_output_dirs.root_dir test_config_ensemble.outlier_range = 0 dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]} save_outliers(test_config_ensemble, dataset_split_metrics, test_config_ensemble.outputs_directory) f = f"{ModelExecutionMode.VAL.value}_outliers.txt" assert_text_files_match(full_file=test_config_ensemble.outputs_directory / f, expected_file=full_ml_test_data_path(f))
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig, test_output_dirs: TestOutputDirectories) -> None: """Test to make sure the outlier file for a split is as expected""" test_config_ensemble.outputs_directory = test_output_dirs.root_dir test_config_ensemble.outlier_range = 0 dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]} save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory)) assert_file_contents_match_exactly(full_file=Path(test_config_ensemble.outputs_directory) / f"{ModelExecutionMode.VAL.value}_outliers.txt", expected_file=Path( full_ml_test_data_path( f"{ModelExecutionMode.VAL.value}_outliers.txt")))