def create_hdf5_from_nifti(input_nifti_volume: Path, input_nifti_seg: Path,
                           input_nifti_mask: Path, output_h5: Path) -> None:
    volume = load_nifti_image(input_nifti_volume).image
    volume_with_channels = np.expand_dims(volume, axis=0)
    volume_with_channels = np.resize(volume_with_channels,
                                     (2, ) + volume_with_channels.shape[1:])
    segmentation = load_nifti_image(input_nifti_seg).image
    seg_with_channels = np.expand_dims(segmentation, axis=0)
    mask = load_nifti_image(input_nifti_mask).image
    mask_with_channels = np.expand_dims(mask, axis=0)
    with h5py.File(str(output_h5), 'w') as hf:
        hf.create_dataset('volume',
                          data=volume_with_channels,
                          compression="gzip",
                          compression_opts=9)
        hf.create_dataset('region',
                          data=seg_with_channels,
                          compression="gzip",
                          compression_opts=9)
        hf.create_dataset('region_1',
                          data=seg_with_channels,
                          compression="gzip",
                          compression_opts=9)
        hf.create_dataset('mask',
                          data=mask_with_channels,
                          compression="gzip",
                          compression_opts=9)
示例#2
0
def test_save_dataset_example(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if the example dataset can be saved as expected.
    """
    image_size = (10, 20, 30)
    label_size = (2, ) + image_size
    spacing = (1, 2, 3)
    np.random.seed(0)
    # Image should look similar to what a photonormalized image looks like: Centered around 0
    image = np.random.rand(*image_size) * 2 - 1
    # Labels are expected in one-hot encoding, predictions as class index
    labels = np.zeros(label_size, dtype=int)
    labels[0] = 1
    labels[0, 5:6, 10:11, 15:16] = 0
    labels[1, 5:6, 10:11, 15:16] = 1
    prediction = np.zeros(image_size, dtype=int)
    prediction[4:7, 9:12, 14:17] = 1
    dataset_sample = DatasetExample(epoch=1,
                                    patient_id=2,
                                    header=ImageHeader(origin=(0, 1, 0),
                                                       direction=(1, 0, 0, 0,
                                                                  1, 0, 0, 0,
                                                                  1),
                                                       spacing=spacing),
                                    image=image,
                                    prediction=prediction,
                                    labels=labels)

    images_folder = test_output_dirs.root_dir
    config = SegmentationModelBase(
        should_validate=False,
        norm_method=PhotometricNormalizationMethod.Unchanged)
    config.set_output_to(images_folder)
    store_and_upload_example(dataset_sample, config)
    image_from_disk = io_util.load_nifti_image(
        os.path.join(config.example_images_folder, "p2_e_1_image.nii.gz"))
    labels_from_disk = io_util.load_nifti_image(
        os.path.join(config.example_images_folder, "p2_e_1_label.nii.gz"))
    prediction_from_disk = io_util.load_nifti_image(
        os.path.join(config.example_images_folder, "p2_e_1_prediction.nii.gz"))
    assert image_from_disk.header.spacing == spacing
    # When no photometric normalization is provided when saving, image is multiplied by 1000.
    # It is then rounded to int64, but converted back to float when read back in.
    expected_from_disk = (image * 1000).astype(np.int16).astype(np.float64)
    assert np.array_equal(image_from_disk.image, expected_from_disk)
    assert labels_from_disk.header.spacing == spacing
    assert np.array_equal(labels_from_disk.image, np.argmax(labels, axis=0))
    assert prediction_from_disk.header.spacing == spacing
    assert np.array_equal(prediction_from_disk.image, prediction)
示例#3
0
def test_run_scoring(test_output_dirs: OutputFolderForTests,
                     is_ensemble: bool) -> None:
    """
    Run the scoring script on an image file.
    This test lives outside the normal Tests folder because it imports "score.py" from the repository root folder.
    If we switched to InnerEye as a package, we would have to treat this import special.
    The inference run here is on a 1-channel model, whereas test_register_and_score_model works with a 2-channel
    model.
    """
    seed_everything(42)
    checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt"
    image_size = (40, 40, 40)
    test_crop_size = image_size
    dummy_config = DummyModel()
    dummy_config.test_crop_size = test_crop_size
    dummy_config.inference_stride_size = (10, 10, 10)
    dummy_config.inference_batch_size = 10
    create_model_and_store_checkpoint(dummy_config, checkpoint)
    all_paths = [checkpoint] * 2 if is_ensemble else [checkpoint]
    inference_pipeline, dummy_config = create_inference_pipeline(dummy_config,
                                                                 all_paths,
                                                                 use_gpu=False)
    image_with_header = io_util.load_nifti_image(test_image)
    image_with_header.image = image_with_header.image[:image_size[
        0], :image_size[1], :image_size[2]]
    result = run_inference([image_with_header, image_with_header],
                           inference_pipeline, dummy_config)
    assert image_with_header.image.shape == result.shape  # type: ignore
    print(f"Unique result values: {np.unique(result)}")
    assert np.all(result == 1)
def _test_load_images_from_channels(metadata: Any, image_channel: Any,
                                    ground_truth_channel: Any,
                                    mask_channel: Any) -> None:
    """
    Test if images are loaded as expected from channels
    """
    sample = io_util.load_images_from_dataset_source(
        PatientDatasetSource(metadata=metadata,
                             image_channels=[image_channel] * 2,
                             ground_truth_channels=[ground_truth_channel] * 4,
                             mask_channel=mask_channel))
    if image_channel:
        image_with_header = io_util.load_nifti_image(image_channel)
        assert list(
            sample.image.shape) == [2] + list(image_with_header.image.shape)
        assert all([
            np.array_equal(x, image_with_header.image) for x in sample.image
        ])  # type: ignore
        if mask_channel:
            assert np.array_equal(sample.mask, image_with_header.image)
        if ground_truth_channel:
            assert list(sample.labels.shape) == [5] + list(
                image_with_header.image.shape)
            assert np.all(sample.labels[0] == 0) and np.all(
                sample.labels[1:] == 1)
示例#5
0
 def _load_and_scale_image(name: str) -> ImageWithHeader:
     image_with_header = load_nifti_image(full_ml_test_data_path(name))
     return ImageWithHeader(image=LinearTransform.transform(
         data=image_with_header.image,
         input_range=(0, 255),
         output_range=(0, 1)),
                            header=image_with_header.header)
示例#6
0
def evaluate_model_predictions(
        process_id: int, config: SegmentationModelBase,
        dataset: FullImageDataset,
        results_folder: Path) -> Tuple[PatientMetadata, MetricsDict]:
    """
    Evaluates model segmentation predictions, dice scores and surface distances are computed.
    Generated contours are plotted and saved in results folder.
    The function is intended to be used in parallel for loop to process each image in parallel.
    :param process_id: Identifier for the process calling the function
    :param config: Segmentation model config object
    :param dataset: Dataset object, it is used to load intensity image, labels, and patient metadata.
    :param results_folder: Path to results folder
    :returns [PatientMetadata, list[list]]: Patient metadata and list of computed metrics for each image.
    """
    sample = dataset.get_samples_at_index(index=process_id)[0]
    logging.info(f"Evaluating predictions for patient {sample.patient_id}")
    patient_results_folder = get_patient_results_folder(
        results_folder, sample.patient_id)
    segmentation = load_nifti_image(patient_results_folder /
                                    DEFAULT_RESULT_IMAGE_NAME).image
    metrics_per_class = metrics.calculate_metrics_per_class(
        segmentation,
        sample.labels,
        ground_truth_ids=config.ground_truth_ids,
        voxel_spacing=sample.image_spacing,
        patient_id=sample.patient_id)
    thumbnails_folder = results_folder / THUMBNAILS_FOLDER
    thumbnails_folder.mkdir(exist_ok=True)
    plotting.plot_contours_for_all_classes(
        sample,
        segmentation=segmentation,
        foreground_class_names=config.ground_truth_ids,
        result_folder=thumbnails_folder,
        image_range=config.output_range)
    return sample.metadata, metrics_per_class
def add_label_stats_to_dataframe(input_dataframe: pd.DataFrame,
                                 dataset_root_directory: Path,
                                 target_label_names: List[str]) -> pd.DataFrame:
    """
    Loops through all available subject IDs, generates ground-truth label statistics and updates input dataframe
    with the computed stats by adding new columns. In particular, it checks the overlapping regions between
    different structures and volume of labels.

    :param input_dataframe: Input Pandas dataframe object containing subjectIds and label names
    :param dataset_root_directory: Path to dataset root directory
    :param target_label_names: A list of label names that are used in label stat computations
    """
    dataset_sources = load_dataset_sources(input_dataframe,
                                           local_dataset_root_folder=dataset_root_directory,
                                           image_channels=["ct"],
                                           ground_truth_channels=target_label_names,
                                           mask_channel=None)

    # Iterate over subjects and check overlapping labels
    for subject_id in [*dataset_sources.keys()]:
        labels = io_util.load_labels_from_dataset_source(dataset_sources[subject_id])
        overlap_stats = metrics_util.get_label_overlap_stats(labels=labels[1:, ...],
                                                             label_names=target_label_names)

        header = io_util.load_nifti_image(dataset_sources[subject_id].ground_truth_channels[0]).header
        volume_stats = metrics_util.get_label_volume(labels=labels[1:, ...],
                                                     label_names=target_label_names,
                                                     label_spacing=header.spacing)

        # Log the extracted label statistics
        for col_name, col_stats in zip(("LabelOverlap", "LabelVolume (mL)"), (overlap_stats, volume_stats)):
            input_dataframe.loc[input_dataframe.subject == subject_id, col_name] = \
                input_dataframe.loc[input_dataframe.subject == subject_id, "channel"].map(col_stats)

    return input_dataframe
 def extract_spacing(self, patient_id: IntOrString) -> TupleFloat3:
     """
     extract spacing for that particular image using the first image channel
     :param patient_id:
     :return:
     """
     return io_util.load_nifti_image(
         self.dataset_sources[patient_id].image_channels[0]).header.spacing
def test_run_scoring(is_ensemble: bool) -> None:
    checkpoints_paths = checkpoint_full_paths * 2 if is_ensemble else checkpoint_full_paths
    dummy_config = DummyModel()
    inference_pipeline, dummy_config = create_inference_pipeline(dummy_config, checkpoints_paths, use_gpu=False)
    image_with_header = io_util.load_nifti_image(test_image)
    result = run_inference([image_with_header, image_with_header], inference_pipeline, dummy_config)
    assert np.all(result == 1)
    assert image_with_header.image.shape == result.shape  # type: ignore
def test_score_check_spacing() -> None:
    config = LOADER.create_model_config_from_name("DummyModel")
    config.dataset_expected_spacing_xyz = (1.0, 1.0, 3.0)
    image_with_header = io_util.load_nifti_image(img_nii_path)
    spacing_xyz = reverse_tuple_float3(image_with_header.header.spacing)
    assert is_spacing_valid(spacing_xyz, config.dataset_expected_spacing_xyz)
    assert is_spacing_valid(spacing_xyz, (1, 1, 3.01))
    assert not is_spacing_valid(spacing_xyz, (1, 1, 3.2))
示例#11
0
def test_nii_load_zyx(test_output_dirs: OutputFolderForTests) -> None:
    expected_shape = (44, 167, 167)
    file_path = full_ml_test_data_path("patch_sampling/scan_small.nii.gz")
    image: sitk.Image = sitk.ReadImage(str(file_path))
    assert image.GetSize() == reverse_tuple_float3(expected_shape)
    img = sitk.GetArrayFromImage(image)
    assert img.shape == expected_shape
    image_header = io_util.load_nifti_image(file_path)
    assert image_header.image.shape == expected_shape
    assert image_header.header.spacing is not None
    np.testing.assert_allclose(image_header.header.spacing, (3.0, 1.0, 1.0), rtol=0.1)
示例#12
0
def score_image(args: ScorePipelineConfig) -> Path:
    """
    Perform model inference on a single image. By doing the following:
    1) Copy the provided data root directory to the root (this contains the model checkpoints and image to infer)
    2) Instantiate an inference pipeline based on the provided model_inference.json in the snapshot
    3) Store the segmentation file in the current directory
    4) Upload the segmentation to AML
    :param args:
    :return:
    """
    logging.getLogger().setLevel(logging.INFO)
    score_py_folder = Path(__file__).parent
    model_folder = Path(args.model_folder or str(score_py_folder))

    run_context = Run.get_context()
    logging.info(f"Run context={run_context.id}")

    if args.use_dicom:
        # Only a single zip file is supported.
        if len(args.image_files) > 1:
            raise ValueError("Supply exactly one zip file in args.images.")
        input_zip_file = check_input_file(args.data_folder,
                                          args.image_files[0])
        reference_series_folder = model_folder / "temp_extraction"
        nifti_filename = model_folder / "temp_nifti.nii.gz"
        convert_zipped_dicom_to_nifti(input_zip_file, reference_series_folder,
                                      nifti_filename)
        test_images = [nifti_filename]
    else:
        test_images = [
            check_input_file(args.data_folder, file)
            for file in args.image_files
        ]

    images = [load_nifti_image(file) for file in test_images]

    inference_pipeline, config = init_from_model_inference_json(
        model_folder, args.use_gpu)
    segmentation = run_inference(images, inference_pipeline, config)

    segmentation_file_name = model_folder / args.result_image_name
    result_dst = store_as_ubyte_nifti(segmentation, images[0].header,
                                      segmentation_file_name)

    if args.use_dicom:
        result_dst = convert_nifti_to_zipped_dicom_rt(
            result_dst, reference_series_folder, model_folder, config,
            args.result_zip_dicom_name, args.model_id)

    if not is_offline_run_context(run_context):
        upload_file_name = args.result_zip_dicom_name if args.use_dicom else args.result_image_name
        run_context.upload_file(upload_file_name, str(result_dst))
    logging.info(f"Segmentation completed: {result_dst}")
    return result_dst
def test_show_non_square_images(test_output_dirs: OutputFolderForTests) -> None:
    input_file = full_ml_test_data_path("patch_sampling") / "scan_small.nii.gz"
    input = load_nifti_image(input_file)
    image = input.image
    shape = image.shape
    mask = np.zeros_like(image)
    mask[shape[0] // 2, shape[1] // 2, shape[2] // 2] = 1
    for dim in range(3):
        scan_with_transparent_overlay(image, mask, dim, shape[dim] // 2, spacing=input.header.spacing)
        actual_file = Path(test_output_dirs.root_dir) / f"dim_{dim}.png"
        resize_and_save(5, 5, actual_file)
        expected = full_ml_test_data_path("patch_sampling") / f"overlay_with_aspect_dim{dim}.png"
        # To update the stored results, uncomment this line:
        # expected.write_bytes(actual_file.read_bytes())
        assert_binary_files_match(actual_file, expected)
示例#14
0
def load_ground_truth_from_run(model_config: SegmentationModelBase, sd_config: SurfaceDistanceConfig, subject_id: int,
                               structure: str) -> np.ndarray:
    """
    For outliers, load individual ground truth file for a given dataset, subject ID and structure name
    :param model_config:
    :param sd_config:
    :param subject_id: ID of the given subject
    :param structure: Name of the anatomical structure
    :return: ground truth array
    """
    ground_truth_path = model_config.outputs_folder / sd_config.run_recovery_id / sd_config.ground_truth_dir \
                        / str(subject_id) / f"{structure}.nii.gz"
    if not ground_truth_path.is_file():
        raise FileNotFoundError(f"No file exists at {ground_truth_path}")
    image = io_util.load_nifti_image(ground_truth_path).image
    return image
def test_load_dicom_series(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that a DICOM series can be loaded.

    :param test_output_dirs: Test output directories.
    :return: None.
    """
    nifti_file = test_output_dirs.root_dir / "test_dicom_series.nii.gz"
    load_dicom_series_and_save(dicom_series_folder, nifti_file)
    expected_shape = (3, 512, 512)
    image_header = io_util.load_nifti_image(nifti_file)
    assert image_header.image.shape == expected_shape
    assert image_header.header.spacing is not None
    np.testing.assert_allclose(image_header.header.spacing,
                               (2.5, 1.269531, 1.269531),
                               rtol=0.1)
示例#16
0
def test_scale_and_unscale_image(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if an image in the CT value range can be recovered when we save dataset examples
    (undoing the effects of CT Windowing)
    """
    image_size = (5, 5, 5)
    spacing = (1, 2, 3)
    header = ImageHeader(origin=(0, 1, 0),
                         direction=(-1, 0, 0, 0, -1, 0, 0, 0, -1),
                         spacing=spacing)
    np.random.seed(0)
    # Random image values with mean -100, std 100. This will cover a range
    # from -400 to +200 HU
    image = np.random.normal(-100, 100, size=image_size)
    window = 200
    level = -100
    # Lower and upper bounds of the interval of raw CT values that will be retained.
    lower = level - window / 2
    upper = level + window / 2
    # Create a copy of the image with all values outside of the (Window, Level) range set to the boundaries.
    # When saving and loading back in, we will not be able to recover any values that fell outside those boundaries.
    image_restricted = image.copy()
    image_restricted[image < lower] = lower
    image_restricted[image > upper] = upper
    # The image will be saved with voxel type short
    image_restricted = image_restricted.astype(int)
    # Apply window and level, mapping to the usual CNN input value range
    cnn_input_range = (-1, +1)
    image_windowed = LinearTransform.transform(data=image,
                                               input_range=(lower, upper),
                                               output_range=cnn_input_range)
    args = SegmentationModelBase(
        norm_method=PhotometricNormalizationMethod.CtWindow,
        output_range=cnn_input_range,
        window=window,
        level=level,
        should_validate=False)

    file_name = test_output_dirs.create_file_or_folder_path(
        "scale_and_unscale_image.nii.gz")
    io_util.store_image_as_short_nifti(image_windowed, header, file_name, args)
    image_from_disk = io_util.load_nifti_image(file_name)
    # noinspection PyTypeChecker
    assert_nifti_content(file_name, image_size, header,
                         np.unique(image_restricted).tolist(), np.short)
    assert np.array_equal(image_from_disk.image, image_restricted)
def test_store_as_nifti(test_output_dirs: TestOutputDirectories, image_type: Any, scale: Any, input_range: Any,
                        output_range: Any) \
        -> None:
    image = np.random.random_sample((dim_z, dim_y, dim_x))
    spacingzyx = (1, 2, 3)
    path_image = test_output_dirs.create_file_or_folder_path(default_image_name)
    header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacingzyx)
    io_util.store_as_nifti(image, header, path_image,
                           image_type, scale, input_range, output_range)
    if scale:
        linear_transform = LinearTransform.transform(data=image, input_range=input_range, output_range=output_range)
        image = linear_transform.astype(image_type)  # type: ignore
    assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name),
                         image.shape, header, list(np.unique(image.astype(image_type))), image_type)

    loaded_image = io_util.load_nifti_image(path_image, image_type)
    assert loaded_image.header.spacing == spacingzyx
示例#18
0
def test_score_image_dicom_mock_run(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works, by mocking out only the run scoring function.

    This mocks out run_inference so that store_as_ubyte_nifti
    is tested in addition to the tests in test_score_image_dicom_mock_run_store.

    :param test_output_dirs: Test output directories.
    """
    model_config = DummyModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = test_output_dirs.root_dir / "temp_pack_dicom_series" / "dicom_series.zip"
    zip_known_dicom_series(zipped_dicom_series_path)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True,
        model_id="Dummy:1")

    image_with_header = io_util.load_nifti_image(HNSEGMENTATION_FILE)

    with mock.patch(
            'score.run_inference',
            return_value=image_with_header.image) as mock_run_inference:
        segmentation = score_image(score_pipeline_config)
        assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED,
                                 model_folder)

    mock_run_inference.assert_called()
示例#19
0
def get_annotations_and_majority_vote(model_config: SegmentationModelBase, annotators: List[str], structure_name: str
                                      ) -> np.ndarray:
    """
    Load each annotation and calculate the 'gold standard' segmentation (with majority voting)
    :param model_config: Config
    :param annotators: List of the annotator names as they appear in filepaths
    :param structure_name: Name of the anatomical structure
    :return:
    """
    iov_dir = model_config.outputs_folder / "iov"
    segmentations = []
    logging.info(f"Annotators going into gold standard: {annotators}")
    for annotator_num, annotator in enumerate(annotators):
        segmentation_path = iov_dir / f"{structure_name}{annotator}.nii.gz"
        segmentation = load_nifti_image(segmentation_path).image
        segmentations.append(segmentation)

    majority_vote_seg = get_majority_vote(segmentations)
    return majority_vote_seg
示例#20
0
def score_image(args: ScorePipelineConfig) -> Path:
    """
    Perform model inference on a single image. By doing the following:
    1) Copy the provided data root directory to the root (this contains the model checkpoints and image to infer)
    2) Instantiate an inference pipeline based on the provided model_inference.json in the snapshot
    3) Store the segmentation file in the current directory
    4) Upload the segmentation to AML
    :param args:
    :return:
    """
    logging.getLogger().setLevel(logging.INFO)
    score_py_folder = Path(__file__).parent
    model_folder = Path(args.model_folder or str(score_py_folder))

    run_context = Run.get_context()
    logging.info(f"Run context={run_context.id}")

    test_images = []
    data_folder = args.data_folder
    for file in args.image_files:
        full_file_path = data_folder / file
        if not full_file_path.exists():
            message = \
                str(data_folder) if data_folder.is_absolute() else f"{data_folder}, absolute: {data_folder.absolute()}"
            raise ValueError(
                f"File {file} does not exist in data folder {message}")
        test_images.append(full_file_path)
    images = [load_nifti_image(file) for file in test_images]
    inference_pipeline, config = init_from_model_inference_json(
        model_folder, args.use_gpu)
    segmentation = run_inference(images, inference_pipeline, config)

    segmentation_file_name = str(model_folder / args.result_image_name)
    result_dst = store_as_ubyte_nifti(segmentation, images[0].header,
                                      segmentation_file_name)
    if not is_offline_run_context(run_context):
        run_context.upload_file(args.result_image_name, segmentation_file_name)
    logging.info(f"Segmentation completed: {result_dst}")
    return result_dst
示例#21
0
def create_smaller_image(image_size: TupleInt3, source_image_dir: Path,
                         target_image_dir: Path, image_file_name: str) -> None:
    """
    Load an image from source_image_dir and create another random image in target_image_dir with same header and
    target size.

    :param image_size: Target image size.
    :param source_image_dir: Source image directory.
    :param target_image_dir: Target image directory.
    :param image_file_name: Common image file name.
    :return: None.
    """
    source_image = io_util.load_nifti_image(source_image_dir / image_file_name)
    source_image_data = source_image.image
    min_data_val = np.min(source_image_data)
    max_data_val = np.max(source_image_data)

    image = np.random.randint(low=min_data_val,
                              high=max_data_val + 1,
                              size=image_size)
    io_util.store_as_nifti(image, source_image.header,
                           target_image_dir / image_file_name, np.short)
示例#22
0
def score_image(args: ScorePipelineConfig) -> Path:
    """
    Perform model inference on a single image. By doing the following:
    1) Copy the provided data root directory to the root (this contains the model checkpoints and image to infer)
    2) Instantiate an inference pipeline based on the provided model_inference.json in the snapshot
    3) Store the segmentation file in the current directory
    4) Upload the segmentation to AML
    :param args:
    :return:
    """
    logging.getLogger().setLevel(logging.INFO)
    project_root = Path(args.project_root)

    # copy the model to the current directory
    copy_tree(args.data_root, str(project_root))
    logging.info(
        f'Copied contents of data_root: {args.data_root} to {project_root}')

    run_context = Run.get_context()
    logging.info(f"Run context={run_context.id}")

    images = [
        load_nifti_image(project_root / DEFAULT_DATA_FOLDER / x)
        for x in args.test_image_channels
    ]
    inference_pipeline, config = init_from_model_inference_json(
        project_root, args.use_gpu)
    segmentation = run_inference(images, inference_pipeline, config)

    segmentation_file_name = str(project_root / args.result_image_name)
    result_dst = store_as_ubyte_nifti(segmentation, images[0].header,
                                      segmentation_file_name)
    if not is_offline_run_context(run_context):
        run_context.upload_file(args.result_image_name, segmentation_file_name)
    logging.info(f"Segmentation completed: {result_dst}")

    return Path(result_dst)
示例#23
0
def assert_nifti_content(full_file: PathOrString, expected_shape: TupleInt3,
                         expected_header: ImageHeader,
                         expected_values: List[int],
                         expected_type: type) -> None:
    """
    Checks if the given nifti file contains the expected unique values, and has the expected type and shape.
    :param full_file: The path to the file.
    :param expected_shape: The expected shape of the image in the nifti file.
    :param expected_header: the expected image header
    :param expected_values: The expected unique values in the image array.
    :param expected_type: The expected type of the stored values.
    """
    if isinstance(full_file, str):
        full_file = Path(full_file)
    assert_file_exists(full_file)
    image_with_header = io_util.load_nifti_image(full_file, None)
    assert image_with_header.image.shape == expected_shape, content_mismatch(
        image_with_header.image.shape, expected_shape)
    assert image_with_header.image.dtype == np.dtype(
        expected_type), content_mismatch(image_with_header.image.dtype,
                                         expected_type)
    image = np.unique(image_with_header.image).tolist()
    assert image == expected_values, content_mismatch(image, expected_values)
    assert image_with_header.header == expected_header
def test_model_test(test_output_dirs: OutputFolderForTests,
                    use_partial_ground_truth: bool,
                    allow_partial_ground_truth: bool) -> None:
    """
    Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test
    :param test_output_dirs: The fixture in conftest.py
    :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users
    :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to
    """
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
    seed_everything(42)
    config = DummyModel()
    config.allow_incomplete_labels = allow_partial_ground_truth
    config.set_output_to(test_output_dirs.root_dir)
    placeholder_dataset_id = "place_holder_dataset_id"
    config.azure_dataset_id = placeholder_dataset_id
    transform = config.get_full_image_sample_transforms().test
    df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))

    if use_partial_ground_truth:
        config.check_exclusive = False
        config.ground_truth_ids = ["region", "region_1"]

        # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4,
        # and 5 are in the test dataset with:
        # Patient 3 has one missing ground truth channel: "region"
        df = df[df["subject"].ne(3) | df["channel"].ne("region")]
        # Patient 4 has all missing ground truth channels: "region", "region_1"
        df = df[df["subject"].ne(4) | df["channel"].ne("region")]
        df = df[df["subject"].ne(4) | df["channel"].ne("region_1")]
        # Patient 5 has no missing ground truth channels.

        config.dataset_data_frame = df

        df = df[df.subject.isin([3, 4, 5])]

        config.train_subject_ids = ['1', '2']
        config.test_subject_ids = ['3', '4', '5']
        config.val_subject_ids = ['6', '7']
    else:
        df = df[df.subject.isin([1, 2])]

    if use_partial_ground_truth and not allow_partial_ground_truth:
        with pytest.raises(ValueError) as value_error:
            # noinspection PyTypeHints
            config._datasets_for_inference = {
                ModelExecutionMode.TEST:
                FullImageDataset(config,
                                 df,
                                 full_image_sample_transforms=transform)
            }  # type: ignore
        assert "Patient 3 does not have channel 'region'" in str(
            value_error.value)
        return
    else:
        # noinspection PyTypeHints
        config._datasets_for_inference = {
            ModelExecutionMode.TEST:
            FullImageDataset(config,
                             df,
                             full_image_sample_transforms=transform)
        }  # type: ignore
    execution_mode = ModelExecutionMode.TEST
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    create_model_and_store_checkpoint(
        config,
        config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX)
    checkpoint_handler.additional_training_done()
    inference_results = model_testing.segmentation_model_test(
        config,
        execution_mode=execution_mode,
        checkpoint_paths=checkpoint_handler.get_checkpoints_to_test())
    epoch_dir = config.outputs_folder / get_best_epoch_results_path(
        execution_mode)
    total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower(
    )
    if not total_num_patients_column_name.endswith("s"):
        total_num_patients_column_name += "s"

    if use_partial_ground_truth:
        num_subjects = len(pd.unique(df["subject"]))
        if allow_partial_ground_truth:
            assert csv_column_contains_value(
                csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE,
                column_name=total_num_patients_column_name,
                value=num_subjects,
                contains_only_value=True)
            assert csv_column_contains_value(
                csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME,
                column_name=MetricsFileColumns.Dice.value,
                value='',
                contains_only_value=False)
    else:
        aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE)
        assert total_num_patients_column_name not in aggregates_df.columns  # Only added if using partial ground truth

        assert not csv_column_contains_value(
            csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME,
            column_name=MetricsFileColumns.Dice.value,
            value='',
            contains_only_value=False)

        assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6)
        assert config.outputs_folder.is_dir()
        assert epoch_dir.is_dir()
        patient1 = io_util.load_nifti_image(train_and_test_data_dir /
                                            "id1_channel1.nii.gz")
        patient2 = io_util.load_nifti_image(train_and_test_data_dir /
                                            "id2_channel1.nii.gz")

        assert_file_contains_string(epoch_dir / DATASET_ID_FILE,
                                    placeholder_dataset_id)
        assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE,
                                    "region")
        assert_text_files_match(
            epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME,
            train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME)
        assert_text_files_match(
            epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
            train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
        # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
        assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

        assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz",
                             get_image_shape(patient1), patient1.header, [137],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz",
                             get_image_shape(patient2), patient2.header, [137],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME,
                             get_image_shape(patient1), patient1.header, [1],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME,
                             get_image_shape(patient2), patient2.header, [1],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz",
                             get_image_shape(patient1), patient1.header, [117],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz",
                             get_image_shape(patient2), patient2.header, [117],
                             np.ubyte)
        thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER
        assert thumbnails_folder.is_dir()
        png_files = list(thumbnails_folder.glob("*.png"))
        overlays = [f for f in png_files if "_region_slice_" in str(f)]
        assert len(overlays) == len(df.subject.unique(
        )), "There should be one overlay/contour file per subject"

        # Writing dataset.csv normally happens at the beginning of training,
        # but this test reads off a saved checkpoint file.
        # Dataset.csv must be present for plot_cross_validation.
        config.write_dataset_files()
        # Test if the metrics files can be picked up correctly by the cross validation code
        config_and_files = get_config_and_results_for_offline_runs(config)
        result_files = config_and_files.files
        assert len(result_files) == 1
        for file in result_files:
            assert file.execution_mode == execution_mode
            assert file.dataset_csv_file is not None
            assert file.dataset_csv_file.exists()
            assert file.metrics_file is not None
            assert file.metrics_file.exists()
def main() -> None:
    parser = create_runner_parser(SegmentationModelBase)
    parser_result = parse_args_and_add_yaml_variables(
        parser, fail_on_unknown_args=True)
    surface_distance_config = SurfaceDistanceConfig.parse_args()

    azure_config = AzureConfig(**parser_result.args)
    config_model = azure_config.model
    if config_model is None:
        raise ValueError(
            "The name of the model to train must be given in the --model argument."
        )

    model_config = ModelConfigLoader().create_model_config_from_name(
        config_model)
    model_config.apply_overrides(parser_result.overrides, should_validate=True)
    execution_mode = surface_distance_config.execution_mode

    run_mode = surface_distance_config.run_mode
    if run_mode == SurfaceDistanceRunType.IOV:
        ct_path = Path(
            "outputs") / SurfaceDistanceRunType.IOV.value.lower() / "ct.nii.gz"
        ct = load_nifti_image(ct_path).image
    else:
        ct = None
    annotators = [
        annotator.strip() for annotator in surface_distance_config.annotators
    ]
    extended_annotators = annotators + [surface_distance_config.model_name]

    outlier_range = surface_distance_config.outlier_range
    predictions = load_predictions(run_mode, azure_config, model_config,
                                   execution_mode, extended_annotators,
                                   outlier_range)
    segmentations = [
        load_nifti_image(Path(pred_seg.segmentation_path))
        for pred_seg in predictions
    ]
    img_shape = segmentations[0].image.shape
    # transpose spacing to match image which is transposed in io_util
    voxel_spacing = segmentations[0].header.spacing[::-1]

    overall_gold_standard = np.zeros(img_shape)
    sds_for_annotator = sd_util.initialise_surface_distance_dictionary(
        extended_annotators, img_shape)

    plane = surface_distance_config.plane
    output_img_dir = Path(surface_distance_config.output_img_dir)

    subject_id: Optional[int] = None
    for prediction, pred_seg_w_header in zip(predictions, segmentations):
        subject_id = prediction.subject_id
        structure_name = prediction.structure_name
        annotator = prediction.annotator
        pred_segmentation = pred_seg_w_header.image
        if run_mode == SurfaceDistanceRunType.OUTLIERS:
            try:
                ground_truth = sd_util.load_ground_truth_from_run(
                    model_config, surface_distance_config, subject_id,
                    structure_name)
            except FileNotFoundError as e:
                logging.warning(e)
                continue
        elif run_mode == SurfaceDistanceRunType.IOV:
            ground_truth = sd_util.get_annotations_and_majority_vote(
                model_config, annotators, structure_name)
        else:
            raise ValueError(
                f'Unrecognised run mode: {run_mode}. Expected either IOV or OUTLIERS'
            )

        binary_prediction_mask = multi_label_array_to_binary(
            pred_segmentation, 2)[1]
        # For comparison, plot gold standard vs predicted segmentation
        segmentation_and_groundtruth_plot(binary_prediction_mask,
                                          ground_truth,
                                          subject_id,
                                          structure_name,
                                          plane,
                                          output_img_dir,
                                          annotator=annotator)

        if run_mode == SurfaceDistanceRunType.IOV:
            overall_gold_standard += ground_truth

        # Calculate and plot surface distance
        sds_full = sd_util.calculate_surface_distances(ground_truth,
                                                       binary_prediction_mask,
                                                       list(voxel_spacing))
        surface_distance_ground_truth_plot(ct,
                                           ground_truth,
                                           sds_full,
                                           subject_id,
                                           structure_name,
                                           plane,
                                           output_img_dir,
                                           annotator=annotator)

        if annotator is not None:
            sds_for_annotator[annotator] += sds_full

    # Plot all structures SDs for each annotator
    if run_mode == SurfaceDistanceRunType.IOV and subject_id is not None:
        for annotator, sds in sds_for_annotator.items():
            num_classes = int(np.amax(np.unique(overall_gold_standard)))
            binarised_gold_standard = multi_label_array_to_binary(
                overall_gold_standard, num_classes)[1:].sum(axis=0)
            surface_distance_ground_truth_plot(ct,
                                               binarised_gold_standard,
                                               sds,
                                               subject_id,
                                               'All',
                                               plane,
                                               output_img_dir,
                                               annotator=annotator)
示例#26
0
def test_model_test(test_output_dirs: OutputFolderForTests) -> None:
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")

    config = DummyModel()
    config.set_output_to(test_output_dirs.root_dir)
    epoch = 1
    config.num_epochs = epoch
    assert config.get_test_epochs() == [epoch]
    placeholder_dataset_id = "place_holder_dataset_id"
    config.azure_dataset_id = placeholder_dataset_id
    transform = config.get_full_image_sample_transforms().test
    df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))
    df = df[df.subject.isin([1, 2])]
    # noinspection PyTypeHints
    config._datasets_for_inference = \
        {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)}  # type: ignore
    execution_mode = ModelExecutionMode.TEST
    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
                                                        project_root=test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    stored_checkpoints = full_ml_test_data_path("checkpoints")
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    checkpoint_handler.additional_training_done()
    inference_results = model_testing.segmentation_model_test(config,
                                                              data_split=execution_mode,
                                                              checkpoint_handler=checkpoint_handler)
    epoch_dir = config.outputs_folder / get_epoch_results_path(epoch, execution_mode)
    assert inference_results.epochs[epoch] == pytest.approx(0.66606902, abs=1e-6)

    assert config.outputs_folder.is_dir()
    assert epoch_dir.is_dir()
    patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz")
    patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz")

    assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
    assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
    assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME,
                            train_and_test_data_dir / model_testing.METRICS_FILE_NAME)
    assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
                            train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
    # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
    assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

    assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1),
                         patient1.header,
                         [136], np.ubyte)
    assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2),
                         patient2.header,
                         [136], np.ubyte)
    assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1),
                         patient1.header,
                         [1], np.ubyte)
    assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2),
                         patient2.header,
                         [1], np.ubyte)
    assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1),
                         patient1.header,
                         [118], np.ubyte)
    assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2),
                         patient2.header,
                         [118], np.ubyte)
    thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER
    assert thumbnails_folder.is_dir()
    png_files = list(thumbnails_folder.glob("*.png"))
    overlays = [f for f in png_files if "_region_slice_" in str(f)]
    assert len(overlays) == len(df.subject.unique()), "There should be one overlay/contour file per subject"

    # Writing dataset.csv normally happens at the beginning of training,
    # but this test reads off a saved checkpoint file.
    # Dataset.csv must be present for plot_cross_validation.
    config.write_dataset_files()
    # Test if the metrics files can be picked up correctly by the cross validation code
    config_and_files = get_config_and_results_for_offline_runs(config)
    result_files = config_and_files.files
    assert len(result_files) == 1
    for file in result_files:
        assert file.execution_mode == execution_mode
        assert file.dataset_csv_file is not None
        assert file.dataset_csv_file.exists()
        assert file.metrics_file is not None
        assert file.metrics_file.exists()
示例#27
0
def get_nifti_shape(full_path: PathOrString) -> TupleInt3:
    """Returns the size of the image in the given Nifti file, as an (X, Y, Z) tuple."""
    image_with_header = io_util.load_nifti_image(full_path)
    return get_image_shape(image_with_header)
def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
                                  labels_to_boundary: bool) -> None:
    """
    Tests if patch sampling and producing diagnostic images works as expected.
    :param test_output_dirs:
    :param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that
    crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries.
    """
    set_random_seed(0)
    shape = (10, 30, 30)
    foreground_classes = ["fg"]
    class_weights = equally_weighted_classes(foreground_classes)
    config = SegmentationModelBase(should_validate=False,
                                   crop_size=(2, 10, 10),
                                   class_weights=class_weights)
    image = np.random.rand(1, *shape).astype(np.float32) * 1000
    mask = np.ones(shape)
    labels = np.zeros((len(class_weights), ) + shape)
    if labels_to_boundary:
        # Generate foreground labels in such a way that a patch centered around a foreground pixel would
        # reach outside of the image.
        labels[1, 4:8, 3:27, 3:27] = 1
    else:
        labels[1, 4:8, 15:18, 15:18] = 1
    labels[0] = 1 - labels[1]
    output_folder = Path(test_output_dirs.root_dir)
    image_header = get_unit_image_header()
    sample = Sample(image=image,
                    mask=mask,
                    labels=labels,
                    metadata=PatientMetadata(patient_id='123',
                                             image_header=image_header))
    expected_folder = full_ml_test_data_path("patch_sampling")
    heatmap = visualize_random_crops(sample,
                                     config,
                                     output_folder=output_folder)
    expected_heatmap = expected_folder / ("sampled_to_boundary.npy"
                                          if labels_to_boundary else
                                          "sampled_center.npy")
    # To update the stored results, uncomment this line:
    # np.save(str(expected_heatmap), heatmap)
    assert np.allclose(heatmap, np.load(
        str(expected_heatmap))), "Patch sampling created a different heatmap."
    f1 = output_folder / "123_ct.nii.gz"
    assert_file_exists(f1)
    f2 = output_folder / "123_sampled_patches.nii.gz"
    assert_file_exists(f2)
    thumbnails = [
        "123_sampled_patches_dim0.png",
        "123_sampled_patches_dim1.png",
        "123_sampled_patches_dim2.png",
    ]
    for f in thumbnails:
        assert_file_exists(output_folder / f)

    expected = expected_folder / ("sampled_to_boundary.nii.gz"
                                  if labels_to_boundary else
                                  "sampled_center.nii.gz")
    # To update test results:
    # shutil.copy(str(f2), str(expected))
    expected_image = io_util.load_nifti_image(expected)
    actual_image = io_util.load_nifti_image(f2)
    np.allclose(expected_image.image, actual_image.image)
    if labels_to_boundary:
        for f in thumbnails:
            # Uncomment this line to update test results
            # (expected_folder / f).write_bytes((output_folder / f).read_bytes())
            if not is_running_on_azure():
                # When running on the Azure build agents, it appears that the bounding box of the images
                # is slightly different than on local runs, even with equal dpi settings.
                # Not able to figure out how to make the run results consistent, hence disable in cloud runs.
                assert_binary_files_match(output_folder / f,
                                          expected_folder / f)
示例#29
0
def test_nii_load_image() -> None:
    image_with_header = io_util.load_nifti_image(known_nii_path)
    assert np.array_equal(image_with_header.image, known_array)
示例#30
0
def test_bad_image_load_image(path: Any) -> None:
    with pytest.raises(ValueError):
        io_util.load_nifti_image(path)