Exemplo n.º 1
0
def test_save_outliers(test_config: PlotCrossValidationConfig,
                       test_output_dirs: OutputFolderForTests) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config.outputs_directory = test_output_dirs.root_dir
    test_config.outlier_range = 0
    assert test_config.run_recovery_id
    dataset_split_metrics = {
        x: _get_metrics_df(test_config.run_recovery_id, x)
        for x in [ModelExecutionMode.VAL]
    }
    save_outliers(test_config, dataset_split_metrics,
                  test_config.outputs_directory)
    filename = f"{ModelExecutionMode.VAL.value}_outliers.txt"
    assert_text_files_match(full_file=test_config.outputs_directory / filename,
                            expected_file=full_ml_test_data_path(filename))
    # Now test without the CSV_INSTITUTION_HEADER and CSV_SERIES_HEADER columns, which will be missing in institutions' environments
    dataset_split_metrics_pruned = {
        x: _get_metrics_df(test_config.run_recovery_id, x).drop(
            columns=[CSV_INSTITUTION_HEADER, CSV_SERIES_HEADER],
            errors="ignore")
        for x in [ModelExecutionMode.VAL]
    }
    save_outliers(test_config, dataset_split_metrics_pruned,
                  test_config.outputs_directory)
    test_data_filename = f"{ModelExecutionMode.VAL.value}_outliers_pruned.txt"
    assert_text_files_match(
        full_file=test_config.outputs_directory / filename,
        expected_file=full_ml_test_data_path(test_data_filename))
Exemplo n.º 2
0
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
                       test_output_dirs: OutputFolderForTests) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config_ensemble.outputs_directory = test_output_dirs.root_dir
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, test_config_ensemble.outputs_directory)
    f = f"{ModelExecutionMode.VAL.value}_outliers.txt"
    assert_text_files_match(full_file=test_config_ensemble.outputs_directory / f,
                            expected_file=full_ml_test_data_path(f))
Exemplo n.º 3
0
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
                       test_output_dirs: TestOutputDirectories) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config_ensemble.outputs_directory = test_output_dirs.root_dir
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory))
    assert_file_contents_match_exactly(full_file=Path(test_config_ensemble.outputs_directory)
                                                 / f"{ModelExecutionMode.VAL.value}_outliers.txt",
                                       expected_file=Path(
                                           full_ml_test_data_path(
                                               f"{ModelExecutionMode.VAL.value}_outliers.txt")))