def create_inference_pipeline(model_config: SegmentationModelBase, full_path_to_checkpoints: List[Path], use_gpu: bool = True) \ -> Tuple[FullImageInferencePipelineBase, SegmentationModelBase]: """ Create pipeline for inference, this can be a single model inference pipeline or an ensemble, if multiple checkpoints provided. :param model_config: Model config to use to create the pipeline. :param full_path_to_checkpoints: Checkpoints to use for model inference. :param use_gpu: If GPU should be used or not. """ model_config.use_gpu = use_gpu logging.info('test_config: ' + model_config.model_name) inference_pipeline: Optional[FullImageInferencePipelineBase] if len(full_path_to_checkpoints) == 1: inference_pipeline = InferencePipeline.create_from_checkpoint( path_to_checkpoint=full_path_to_checkpoints[0], model_config=model_config) else: inference_pipeline = EnsemblePipeline.create_from_checkpoints( path_to_checkpoints=full_path_to_checkpoints, model_config=model_config) if inference_pipeline is None: raise ValueError("Cannot create inference pipeline") return inference_pipeline, model_config
def create_pipeline_from_checkpoint_paths( config: ModelConfigBase, checkpoint_paths: List[Path]) -> Optional[InferencePipelineBase]: """ Attempt to create a pipeline from the provided checkpoint paths. If the files referred to by the paths do not exist, or if there are no paths, None will be returned. """ if len(checkpoint_paths) > 1: if config.is_segmentation_model: assert isinstance(config, SegmentationModelBase) return EnsemblePipeline.create_from_checkpoints( path_to_checkpoints=checkpoint_paths, model_config=config) elif config.is_scalar_model: assert isinstance(config, ScalarModelBase) return ScalarEnsemblePipeline.create_from_checkpoint( paths_to_checkpoint=checkpoint_paths, config=config) else: raise NotImplementedError( "Cannot create inference pipeline for unknown model type") if len(checkpoint_paths) == 1: if config.is_segmentation_model: assert isinstance(config, SegmentationModelBase) return InferencePipeline.create_from_checkpoint( path_to_checkpoint=checkpoint_paths[0], model_config=config) elif config.is_scalar_model: assert isinstance(config, ScalarModelBase) return ScalarInferencePipeline.create_from_checkpoint( path_to_checkpoint=checkpoint_paths[0], config=config) else: raise NotImplementedError( "Cannot create ensemble pipeline for unknown model type") return None
def create_inference_pipeline(config: ModelConfigBase, checkpoint_paths: List[Path]) -> Optional[InferencePipelineBase]: """ If multiple checkpoints are found in run_recovery then create EnsemblePipeline otherwise InferencePipeline. If no checkpoint files exist in the run recovery or current run checkpoint folder, None will be returned. :param config: Model related configs. :param epoch: The epoch for which to create pipeline for. :param run_recovery: RunRecovery data if applicable :return: FullImageInferencePipelineBase or ScalarInferencePipelineBase """ if not checkpoint_paths: return None if len(checkpoint_paths) > 1: if config.is_segmentation_model: assert isinstance(config, SegmentationModelBase) return EnsemblePipeline.create_from_checkpoints(path_to_checkpoints=checkpoint_paths, model_config=config) elif config.is_scalar_model: assert isinstance(config, ScalarModelBase) return ScalarEnsemblePipeline.create_from_checkpoint(paths_to_checkpoint=checkpoint_paths, config=config) else: raise NotImplementedError("Cannot create inference pipeline for unknown model type") if len(checkpoint_paths) == 1: if config.is_segmentation_model: assert isinstance(config, SegmentationModelBase) return InferencePipeline.create_from_checkpoint(path_to_checkpoint=checkpoint_paths[0], model_config=config) elif config.is_scalar_model: assert isinstance(config, ScalarModelBase) return ScalarInferencePipeline.create_from_checkpoint(path_to_checkpoint=checkpoint_paths[0], config=config) else: raise NotImplementedError("Cannot create ensemble pipeline for unknown model type") return None
def test_aggregate_results() -> None: """ Test to make sure inference results are aggregated as expected """ torch.manual_seed(1) num_models = 3 # set expected posteriors model_results = [] # create results for each model for x in range(num_models): posteriors = torch.nn.functional.softmax(torch.rand(3, 3, 3, 3), dim=0).numpy() model_results.append( InferencePipeline.Result( epoch=0, patient_id=0, posteriors=posteriors, segmentation=posteriors_to_segmentation(posteriors), voxel_spacing_mm=(1, 1, 1))) # We calculate expected_posteriors before aggregating, as aggregation modifies model_results. expected_posteriors = np.mean([x.posteriors for x in model_results], axis=0) ensemble_result = EnsemblePipeline.aggregate_results( model_results, aggregation_type=EnsembleAggregationType.Average) assert ensemble_result.epoch == model_results[0].epoch assert ensemble_result.patient_id == model_results[0].patient_id assert np.array_equal(ensemble_result.posteriors, expected_posteriors) assert np.array_equal(ensemble_result.segmentation, posteriors_to_segmentation(expected_posteriors))
def test_store_inference_results( test_output_dirs: TestOutputDirectories) -> None: np.random.seed(0) num_classes = 2 posterior = torch.nn.functional.softmax(torch.from_numpy( np.random.random_sample((num_classes, dim_z, dim_y, dim_x))), dim=0).numpy() segmentation = np.argmax(posterior, axis=0) assert segmentation.shape == (dim_z, dim_y, dim_x) posterior0 = to_unique_bytes(posterior[0], (0, 1)) posterior1 = to_unique_bytes(posterior[1], (0, 1)) spacing = (2.0, 2.0, 2.0) header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing) inference_result = InferencePipeline.Result(epoch=1, patient_id=12, posteriors=posterior, segmentation=segmentation, voxel_spacing_mm=(1, 1, 1)) test_config = _create_config_with_folders(test_output_dirs) assert test_config.class_and_index_with_background() == { "background": 0, "region": 1 } results_folder = test_output_dirs.root_dir store_inference_results(inference_result, test_config, Path(results_folder), header) assert_nifti_content( os.path.join(results_folder, "012", "posterior_background.nii.gz"), segmentation.shape, header, list(posterior0), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "posterior_region.nii.gz"), segmentation.shape, header, list(posterior1), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "background.nii.gz"), segmentation.shape, header, list([0, 1]), np.ubyte) assert_nifti_content(os.path.join(results_folder, "012", "region.nii.gz"), segmentation.shape, header, list([0, 1]), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", DEFAULT_RESULT_IMAGE_NAME), segmentation.shape, header, list(np.unique(segmentation)), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "uncertainty.nii.gz"), inference_result.uncertainty.shape, header, list([248, 249, 253, 254]), np.ubyte)
def create_from_checkpoints(path_to_checkpoints: List[Path], model_config: SegmentationModelBase) -> EnsemblePipeline: pipelines = [] for i, path in enumerate(path_to_checkpoints): pipeline = InferencePipeline.create_from_checkpoint(path, model_config, i) if pipeline is None: logging.warning(f"Cannot create pipeline from path {path}; dropping it from ensemble") else: pipelines.append(pipeline) if not pipelines: raise ValueError("Could not create ANY pipelines from checkpoint paths") return EnsemblePipeline(model_config=model_config, inference_pipelines=pipelines)
def run_inference_on_unet(size: TupleInt3) -> None: """ Runs a model forward pass on a freshly created model, with an input image of the given size. Asserts that the model prediction has the same size as the input image. """ fg_classes = ["tumour_mass", "subtract"] number_of_classes = len(fg_classes) + 1 config = SegmentationModelBase( architecture="UNet3D", local_dataset=Path("dummy"), feature_channels=[1], kernel_size=3, largest_connected_component_foreground_classes=fg_classes, posterior_smoothing_mm=(2, 2, 2), crop_size=(64, 64, 64), # test_crop_size must be larger than 'size for the bug to trigger test_crop_size=(80, 80, 80), image_channels=["mr"], ground_truth_ids=fg_classes, ground_truth_ids_display_names=fg_classes, colours=[(255, 0, 0)] * len(fg_classes), fill_holes=[False] * len(fg_classes), mask_id=None, class_weights=[1.0 / number_of_classes] * number_of_classes, train_batch_size=8, inference_batch_size=1, inference_stride_size=(40, 40, 40), use_mixed_precision=True) lightning_model = create_lightning_model(config) assert isinstance(lightning_model, SegmentationLightning) pipeline = InferencePipeline(model=lightning_model, model_config=config) image = np.random.uniform(-1, 1, (1, ) + size) result = pipeline.predict_and_post_process_whole_image( image, mask=np.ones(size), voxel_spacing_mm=(1, 1, 1)) # All posteriors and segmentations must have the size of the input image for p in [*result.posteriors, result.segmentation]: assert p.shape == size # Check that all results are not NaN. In particular, if stride size is not adjusted # correctly, the results would be partially NaN. image_util.check_array_range(p)
def test_check_inference_result(segmentation: Any, posteriors: Any, voxel_spacing_mm: Any) -> None: """ Tests to make sure correct checks are made when creating results. :return: """ with pytest.raises(Exception): InferencePipeline.Result( epoch=0, patient_id=0, segmentation=segmentation, posteriors=posteriors, voxel_spacing_mm=voxel_spacing_mm )
def inference_identity( test_output_dirs: OutputFolderForTests, image_size: Any = (4, 5, 8), crop_size: Any = (5, 5, 5), shrink_by: Any = (0, 0, 0), num_classes: int = 5, create_mask: bool = True, extract_largest_foreground_connected_component: bool = False, is_ensemble: bool = False, posterior_smoothing_mm: Any = None) -> None: """ Test to make sure inference pipeline is identity preserving, ie: we can recreate deterministic model output, ensuring the patching and stitching is robust. """ # fix random seed np.random.seed(0) ground_truth_ids = list(map(str, range(num_classes))) # image to run inference on: The mock model passes the input image through, hence the input # image must have as many channels as we have classes (plus background), such that the output is # also a valid posterior. num_channels = num_classes + 1 image_channels = np.random.randn(num_channels, *list(image_size)) # create a random mask if required mask = np.round(np.random.uniform( size=image_size)).astype(np.int) if create_mask else None config = InferenceIdentityModel(shrink_by=shrink_by) config.crop_size = crop_size config.test_crop_size = crop_size config.image_channels = list(map(str, range(num_channels))) config.ground_truth_ids = ground_truth_ids config.posterior_smoothing_mm = posterior_smoothing_mm # We have to set largest_connected_component_foreground_classes after creating the model config, # because this parameter is not overridable and hence will not be set by GenericConfig's constructor. if extract_largest_foreground_connected_component: config.largest_connected_component_foreground_classes = [ (c, None) for c in ground_truth_ids ] # set expected posteriors expected_posteriors = torch.nn.functional.softmax( torch.tensor(image_channels), dim=0).numpy() # apply the mask if required if mask is not None: expected_posteriors = image_util.apply_mask_to_posteriors( expected_posteriors, mask) if posterior_smoothing_mm is not None: expected_posteriors = image_util.gaussian_smooth_posteriors( posteriors=expected_posteriors, kernel_size_mm=posterior_smoothing_mm, voxel_spacing_mm=(1, 1, 1)) # compute expected segmentation expected_segmentation = image_util.posteriors_to_segmentation( expected_posteriors) if extract_largest_foreground_connected_component: largest_component = image_util.extract_largest_foreground_connected_component( multi_label_array=expected_segmentation) # make sure the test data is accurate by checking if more than one component exists assert not np.array_equal(largest_component, expected_segmentation) expected_segmentation = largest_component # instantiate the model checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt" create_model_and_store_checkpoint(config, checkpoint_path=checkpoint) # create single or ensemble inference pipeline inference_pipeline = InferencePipeline.create_from_checkpoint( path_to_checkpoint=checkpoint, model_config=config) assert inference_pipeline is not None full_image_inference_pipeline = EnsemblePipeline([inference_pipeline], config) \ if is_ensemble else inference_pipeline # compute full image inference results inference_result = full_image_inference_pipeline \ .predict_and_post_process_whole_image(image_channels=image_channels, mask=mask, voxel_spacing_mm=(1, 1, 1)) # Segmentation must have same size as input image assert inference_result.segmentation.shape == image_size assert inference_result.posteriors.shape == (num_classes + 1, ) + image_size # check that the posteriors and segmentations are as expected. Flatten to a list so that the error # messages are more informative. assert np.allclose(inference_result.posteriors, expected_posteriors) assert np.array_equal(inference_result.segmentation, expected_segmentation)
def test_evaluate_model_predictions() -> None: """ Creates an 'InferencePipeline.Result' object using pre-defined volumes, stores results and evaluates metrics. """ # Patients 3, 4, and 5 are in test dataset such that: # Patient 3 has one missing ground truth channel: "region" # Patient 4 has all missing ground truth channels: "region", "region_1" # Patient 5 has no missing ground truth channels. input_list = [ ["1", "train_and_test_data/id1_channel1.nii.gz", "channel1"], ["1", "train_and_test_data/id1_channel1.nii.gz", "channel2"], ["1", "train_and_test_data/id1_mask.nii.gz", "mask"], ["1", "train_and_test_data/id1_region.nii.gz", "region"], ["1", "train_and_test_data/id1_region.nii.gz", "region_1"], ["2", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["2", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["2", "train_and_test_data/id2_mask.nii.gz", "mask"], ["2", "train_and_test_data/id2_region.nii.gz", "region"], ["2", "train_and_test_data/id2_region.nii.gz", "region_1"], ["3", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["3", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["3", "train_and_test_data/id2_mask.nii.gz", "mask"], # ["3", "train_and_test_data/id2_region.nii.gz", "region"], # commented on purpose ["3", "train_and_test_data/id2_region.nii.gz", "region_1"], ["4", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["4", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["4", "train_and_test_data/id2_mask.nii.gz", "mask"], # ["4", "train_and_test_data/id2_region.nii.gz", "region"], # commented on purpose # ["4", "train_and_test_data/id2_region.nii.gz", "region_1"], # commented on purpose ["5", "train_and_test_data/id2_channel1.nii.gz", "channel1"], ["5", "train_and_test_data/id2_channel1.nii.gz", "channel2"], ["5", "train_and_test_data/id2_mask.nii.gz", "mask"], ["5", "train_and_test_data/id2_region.nii.gz", "region"], ["5", "train_and_test_data/id2_region.nii.gz", "region_1"] ] config = create_config_from_dataset(input_list, train=['1'], val=['2'], test=['3', '4', '5']) config.allow_incomplete_labels = True ds = config.get_torch_dataset_for_inference(ModelExecutionMode.TEST) results_folder = config.outputs_folder if not results_folder.is_dir(): results_folder.mkdir() model_prediction_evaluations: List[Tuple[PatientMetadata, MetricsDict]] = [] for sample_index, sample in enumerate(ds, 1): sample = Sample.from_dict(sample=sample) posteriors = np.zeros((3, ) + sample.mask.shape, 'float32') posteriors[0][:] = 0.2 posteriors[1][:] = 0.6 posteriors[2][:] = 0.2 assert config.dataset_expected_spacing_xyz is not None inference_result = InferencePipeline.Result( patient_id=sample.patient_id, posteriors=posteriors, segmentation=np.argmax(posteriors, 0), voxel_spacing_mm=config.dataset_expected_spacing_xyz) store_inference_results(inference_result=inference_result, config=config, results_folder=results_folder, image_header=sample.metadata.image_header) metadata, metrics_per_class = evaluate_model_predictions( sample_index - 1, config=config, dataset=ds, results_folder=results_folder) model_prediction_evaluations.append((metadata, metrics_per_class)) # Patient 3 has one missing ground truth channel: "region" if sample.metadata.patient_id == '3': assert 'Dice' in metrics_per_class.values('region_1').keys() assert 'HausdorffDistance_millimeters' in metrics_per_class.values( 'region_1').keys() assert 'MeanSurfaceDistance_millimeters' in metrics_per_class.values( 'region_1').keys() for hue_name in ['region', 'Default']: for metric_type in metrics_per_class.values(hue_name).keys(): assert np.isnan( metrics_per_class.values(hue_name)[metric_type]).all() # Patient 4 has all missing ground truth channels: "region", "region_1" if sample.metadata.patient_id == '4': for hue_name in ['region_1', 'region', 'Default']: for metric_type in metrics_per_class.values(hue_name).keys(): assert np.isnan( metrics_per_class.values(hue_name)[metric_type]).all() # Patient 5 has no missing ground truth channels if sample.metadata.patient_id == '5': for metric_type in metrics_per_class.values('Default').keys(): assert np.isnan( metrics_per_class.values('Default')[metric_type]).all() for hue_name in ['region_1', 'region']: assert 'Dice' in metrics_per_class.values(hue_name).keys() assert 'HausdorffDistance_millimeters' in metrics_per_class.values( hue_name).keys() assert 'MeanSurfaceDistance_millimeters' in metrics_per_class.values( hue_name).keys() metrics_writer, average_dice = populate_metrics_writer( model_prediction_evaluations, config) # Patient 3 has only one missing ground truth channel assert not np.isnan(average_dice[0]) assert np.isnan(float(metrics_writer.columns["Dice"][0])) assert not np.isnan(float(metrics_writer.columns["Dice"][1])) assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][0])) assert not np.isnan( float(metrics_writer.columns["HausdorffDistance_mm"][1])) assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][0])) assert not np.isnan(float(metrics_writer.columns["MeanDistance_mm"][1])) # Patient 4 has all missing ground truth channels assert np.isnan(average_dice[1]) assert np.isnan(float(metrics_writer.columns["Dice"][2])) assert np.isnan(float(metrics_writer.columns["Dice"][3])) assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][2])) assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][3])) assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][2])) assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][3])) # Patient 5 has no missing ground truth channels. assert average_dice[2] > 0 assert float(metrics_writer.columns["Dice"][4]) >= 0 assert float(metrics_writer.columns["Dice"][5]) >= 0 assert float(metrics_writer.columns["HausdorffDistance_mm"][4]) >= 0 assert float(metrics_writer.columns["HausdorffDistance_mm"][5]) >= 0 assert float(metrics_writer.columns["MeanDistance_mm"][4]) >= 0 assert float(metrics_writer.columns["MeanDistance_mm"][5]) >= 0