def test_store_inference_results( test_output_dirs: TestOutputDirectories) -> None: np.random.seed(0) num_classes = 2 posterior = torch.nn.functional.softmax(torch.from_numpy( np.random.random_sample((num_classes, dim_z, dim_y, dim_x))), dim=0).numpy() segmentation = np.argmax(posterior, axis=0) assert segmentation.shape == (dim_z, dim_y, dim_x) posterior0 = to_unique_bytes(posterior[0], (0, 1)) posterior1 = to_unique_bytes(posterior[1], (0, 1)) spacing = (2.0, 2.0, 2.0) header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing) inference_result = InferencePipeline.Result(epoch=1, patient_id=12, posteriors=posterior, segmentation=segmentation, voxel_spacing_mm=(1, 1, 1)) test_config = _create_config_with_folders(test_output_dirs) assert test_config.class_and_index_with_background() == { "background": 0, "region": 1 } results_folder = test_output_dirs.root_dir store_inference_results(inference_result, test_config, Path(results_folder), header) assert_nifti_content( os.path.join(results_folder, "012", "posterior_background.nii.gz"), segmentation.shape, header, list(posterior0), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "posterior_region.nii.gz"), segmentation.shape, header, list(posterior1), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "background.nii.gz"), segmentation.shape, header, list([0, 1]), np.ubyte) assert_nifti_content(os.path.join(results_folder, "012", "region.nii.gz"), segmentation.shape, header, list([0, 1]), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", DEFAULT_RESULT_IMAGE_NAME), segmentation.shape, header, list(np.unique(segmentation)), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "uncertainty.nii.gz"), inference_result.uncertainty.shape, header, list([248, 249, 253, 254]), np.ubyte)
def test_evaluate_model_predictions() -> None: """ Creates an 'InferencePipeline.Result' object using pre-defined volumes, stores results and evaluates metrics. """ # Patients 3, 4, and 5 are in test dataset such that: # Patient 3 has one missing ground truth channel: "region" # Patient 4 has all missing ground truth channels: "region", "region_1" # Patient 5 has no missing ground truth channels. input_list = [ ["1", "train_and_test_data/id1_channel1.nii.gz", "channel1"], ["1", "train_and_test_data/id1_channel1.nii.gz", "channel2"], ["1", "train_and_test_data/id1_mask.nii.gz", "mask"], ["1", "train_and_test_data/id1_region.nii.gz", "region"], ["1", "train_and_test_data/id1_region.nii.gz", "region_1"], ["2", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["2", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["2", "train_and_test_data/id2_mask.nii.gz", "mask"], ["2", "train_and_test_data/id2_region.nii.gz", "region"], ["2", "train_and_test_data/id2_region.nii.gz", "region_1"], ["3", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["3", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["3", "train_and_test_data/id2_mask.nii.gz", "mask"], # ["3", "train_and_test_data/id2_region.nii.gz", "region"], # commented on purpose ["3", "train_and_test_data/id2_region.nii.gz", "region_1"], ["4", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["4", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["4", "train_and_test_data/id2_mask.nii.gz", "mask"], # ["4", "train_and_test_data/id2_region.nii.gz", "region"], # commented on purpose # ["4", "train_and_test_data/id2_region.nii.gz", "region_1"], # commented on purpose ["5", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["5", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["5", "train_and_test_data/id2_mask.nii.gz", "mask"], ["5", "train_and_test_data/id2_region.nii.gz", "region"], ["5", "train_and_test_data/id2_region.nii.gz", "region_1"] ] config = create_config_from_dataset(input_list, train=['1'], val=['2'], test=['3', '4', '5']) config.allow_incomplete_labels = True ds = config.get_torch_dataset_for_inference(ModelExecutionMode.TEST) results_folder = config.outputs_folder if not results_folder.is_dir(): results_folder.mkdir() model_prediction_evaluations: List[Tuple[PatientMetadata, MetricsDict]] = [] for sample_index, sample in enumerate(ds, 1): sample = Sample.from_dict(sample=sample) posteriors = np.zeros((3, ) + sample.mask.shape, 'float32') posteriors[0][:] = 0.2 posteriors[1][:] = 0.6 posteriors[2][:] = 0.2 assert config.dataset_expected_spacing_xyz is not None inference_result = InferencePipeline.Result( patient_id=sample.patient_id, posteriors=posteriors, segmentation=np.argmax(posteriors, 0), voxel_spacing_mm=config.dataset_expected_spacing_xyz) store_inference_results(inference_result=inference_result, config=config, results_folder=results_folder, image_header=sample.metadata.image_header) metadata, metrics_per_class = evaluate_model_predictions( sample_index - 1, config=config, dataset=ds, results_folder=results_folder) model_prediction_evaluations.append((metadata, metrics_per_class)) # Patient 3 has one missing ground truth channel: "region" if sample.metadata.patient_id == '3': assert 'Dice' in metrics_per_class.values('region_1').keys() assert 'HausdorffDistance_millimeters' in metrics_per_class.values( 'region_1').keys() assert 'MeanSurfaceDistance_millimeters' in metrics_per_class.values( 'region_1').keys() for hue_name in ['region', 'Default']: for metric_type in metrics_per_class.values(hue_name).keys(): assert np.isnan( metrics_per_class.values(hue_name)[metric_type]).all() # Patient 4 has all missing ground truth channels: "region", "region_1" if sample.metadata.patient_id == '4': for hue_name in ['region_1', 'region', 'Default']: for metric_type in metrics_per_class.values(hue_name).keys(): assert np.isnan( metrics_per_class.values(hue_name)[metric_type]).all() # Patient 5 has no missing ground truth channels if sample.metadata.patient_id == '5': for metric_type in metrics_per_class.values('Default').keys(): assert np.isnan( metrics_per_class.values('Default')[metric_type]).all() for hue_name in ['region_1', 'region']: assert 'Dice' in metrics_per_class.values(hue_name).keys() assert 'HausdorffDistance_millimeters' in metrics_per_class.values( hue_name).keys() assert 'MeanSurfaceDistance_millimeters' in metrics_per_class.values( hue_name).keys() metrics_writer, average_dice = populate_metrics_writer( model_prediction_evaluations, config) # Patient 3 has only one missing ground truth channel assert not np.isnan(average_dice[0]) assert np.isnan(float(metrics_writer.columns["Dice"][0])) assert not np.isnan(float(metrics_writer.columns["Dice"][1])) assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][0])) assert not np.isnan( float(metrics_writer.columns["HausdorffDistance_mm"][1])) assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][0])) assert not np.isnan(float(metrics_writer.columns["MeanDistance_mm"][1])) # Patient 4 has all missing ground truth channels assert np.isnan(average_dice[1]) assert np.isnan(float(metrics_writer.columns["Dice"][2])) assert np.isnan(float(metrics_writer.columns["Dice"][3])) assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][2])) assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][3])) assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][2])) assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][3])) # Patient 5 has no missing ground truth channels. assert average_dice[2] > 0 assert float(metrics_writer.columns["Dice"][4]) >= 0 assert float(metrics_writer.columns["Dice"][5]) >= 0 assert float(metrics_writer.columns["HausdorffDistance_mm"][4]) >= 0 assert float(metrics_writer.columns["HausdorffDistance_mm"][5]) >= 0 assert float(metrics_writer.columns["MeanDistance_mm"][4]) >= 0 assert float(metrics_writer.columns["MeanDistance_mm"][5]) >= 0