def test_load_nonexistent_csv() -> None: """ Check that an error is returned when attempting to load a non-existent CSV. """ expected_cols = [CSV_PATH_HEADER, CSV_FEATURE_HEADER] with pytest.raises(Exception) as exc: load_csv(nonexistent_csv_path, expected_cols) assert str(exc.value) == "No CSV file exists at this location: {0}".format( nonexistent_csv_path)
def test_load_csv(csv_path: Path) -> None: """ Check that loaded dataframe has the expected number of rows. """ df = load_csv(csv_path, expected_cols=known_df_cols) assert len(df) == 6 assert all([x in list(df.columns) for x in known_df_cols])
def test_load_csv_no_expected_cols() -> None: """ Check that an error is raised when the user neglects to provide a list of expected columns. """ with pytest.raises(Exception): load_csv(known_csv_path, [])
def load_predictions(run_type: SurfaceDistanceRunType, azure_config: AzureConfig, model_config: SegmentationModelBase, execution_mode: ModelExecutionMode, extended_annotators: List[str], outlier_range: float) -> List[Segmentation]: """ For each run type (IOV or outliers), instantiate a list of predicted Segmentations and return :param run_type: either "iov" or "outliers: :param azure_config: AzureConfig :param model_config: GenericConfig :param execution_mode: ModelExecutionMode: Either Test, Train or Val :param extended_annotators: List of annotators plus model_name to load segmentations for :param outlier_range: The standard deviation from the mean which the points have to be below to be considered an outlier. :return: list of [(subject_id, structure name and dice_scores)] """ predictions = [] if run_type == SurfaceDistanceRunType.OUTLIERS: first_child_run = sd_util.get_first_child_run(azure_config) output_dir = sd_util.get_run_output_dir(azure_config, model_config) metrics_path = sd_util.get_metrics_path(azure_config, model_config) # Load the downloaded metrics CSV as dataframe and determine worst performing outliers for the Test run df = load_csv(metrics_path, [ MetricsFileColumns.Patient.value, MetricsFileColumns.Structure.value ]) test_run_df = df[df['mode'] == execution_mode.value] worst_performers = get_worst_performing_outliers( test_run_df, outlier_range, MetricsFileColumns.Dice.value, max_n_outliers=-50) for (subject_id, structure_name, dice_score, _) in worst_performers: subject_prefix = sd_util.get_subject_prefix( model_config, execution_mode, subject_id) # if not already present, download data for subject download_run_outputs_by_prefix(blobs_prefix=subject_prefix, destination=output_dir, run=first_child_run) # check it has been downloaded segmentation_path = output_dir / subject_prefix / f"{structure_name}.nii.gz" predictions.append( Segmentation(structure_name=structure_name, subject_id=subject_id, segmentation_path=segmentation_path, dice_score=float(dice_score))) elif run_type == SurfaceDistanceRunType.IOV: subject_id = 0 iov_dir = Path("outputs") / SurfaceDistanceRunType.IOV.value.lower() all_structs = model_config.class_and_index_with_background() structs_to_plot = [ struct_name for struct_name in all_structs.keys() if struct_name not in ['background', 'external'] ] for annotator in extended_annotators: for struct_name in structs_to_plot: segmentation_path = iov_dir / f"{struct_name + annotator}.nii.gz" if not segmentation_path.is_file(): logging.warning(f"No such file {segmentation_path}") continue predictions.append( Segmentation(structure_name=struct_name, subject_id=subject_id, segmentation_path=segmentation_path, annotator=annotator)) return predictions