Exemplo n.º 1
0
def load_image(path: PathOrString,
               image_type: Optional[Type] = float) -> ImageWithHeader:
    """
    Loads an image with extension numpy or nifti
    For HDF5 path suffix
        For images |<dataset_name>|<channel index>
        For segmentation binary |<dataset_name>|<channel index>
        For segmentation multimap |<dataset_name>|<channel index>|<multimap value>
        The expected dimensions to be (channel, Z, Y, X)
    :param path: The path to the file
    :param image_type: The type of the image
    """
    SEPARATOR = '|'
    if is_nifti_file_path(path):
        return load_nifti_image(path, image_type)
    elif is_numpy_file_path(path):
        image = load_numpy_image(path, image_type)
        header = get_unit_image_header()
        return ImageWithHeader(image, header)
    elif SEPARATOR in str(path):
        hdf5_path_split_by_colon = str(path).split(SEPARATOR)
        if len(hdf5_path_split_by_colon) == 4:
            # segmentation multimap
            h5_path = hdf5_path_split_by_colon[0]
            dataset = hdf5_path_split_by_colon[1]
            channel = int(hdf5_path_split_by_colon[2])
            segmentation_id = int(hdf5_path_split_by_colon[3])
            image = load_hdf5_dataset_from_file(
                Path(h5_path),
                dataset)[channel] == segmentation_id  # create mask
            header = get_unit_image_header()
            return ImageWithHeader(image, header)
        elif len(hdf5_path_split_by_colon) == 3:
            h5_path = hdf5_path_split_by_colon[0]
            dataset = hdf5_path_split_by_colon[1]
            channel = int(hdf5_path_split_by_colon[2])
            image = load_hdf5_dataset_from_file(Path(h5_path),
                                                dataset)[channel]
            header = get_unit_image_header()
            return ImageWithHeader(image, header)
    elif is_png(path):
        import imageio
        image = imageio.imread(path).astype(np.float)
        header = get_unit_image_header()
        return ImageWithHeader(image, header)
    raise ValueError(f"Invalid file type {path}")
def test_register_and_score_model(test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected when used on a recently created
    model. This test is run after training an ensemble run in AzureML. It starts "submit_for_inference" via
    Popen. The inference run here is on a 2-channel model, whereas test_submit_for_inference works with a 1-channel
    model.
    """
    azureml_model = get_most_recent_model(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    assert azureml_model is not None
    assert PYTHON_ENVIRONMENT_NAME in azureml_model.tags, "Environment name not present in model properties"
    # download the registered model and test that we can run the score pipeline on it
    model_root = Path(azureml_model.download(str(test_output_dirs.root_dir)))
    # The model needs to contain score.py at the root, the (merged) environment definition,
    # and the inference config.
    expected_files = [
        *fixed_paths.SCRIPTS_AT_ROOT,
        fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
        fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
        "InnerEye/ML/runner.py",
    ]
    for expected_file in expected_files:
        assert (model_root / expected_file).is_file(), f"File {expected_file} missing"
    checkpoint_folder = model_root / CHECKPOINT_FOLDER
    assert checkpoint_folder.is_dir()
    checkpoints = list(checkpoint_folder.rglob("*"))
    assert len(checkpoints) >= 1, "There must be at least 1 checkpoint"

    # create a dummy datastore to store the image data
    test_datastore = test_output_dirs.root_dir / "test_datastore"
    # move test data into the data folder to simulate an actual run
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
    img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"]
    data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER
    data_root.mkdir(parents=True)
    for f in img_files:
        shutil.copy(str(train_and_test_data_dir / f), str(data_root))

    # run score pipeline as a separate process
    python_executable = sys.executable
    [return_code1, stdout1] = spawn_and_monitor_subprocess(process=python_executable,
                                                           args=["--version"])
    assert return_code1 == 0
    print(f"Executing Python version {stdout1[0]}")
    return_code, stdout2 = spawn_and_monitor_subprocess(process=python_executable, args=[
        str(model_root / fixed_paths.SCORE_SCRIPT),
        f"--data_folder={str(data_root)}",
        f"--image_files={img_files[0]},{img_files[1]}",
        "--use_gpu=False"])

    # check that the process completed as expected
    assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}"
    expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME
    assert expected_segmentation_path.exists(), f"Result file not found: {expected_segmentation_path}"

    # sanity check the resulting segmentation
    expected_shape = get_nifti_shape(train_and_test_data_dir / img_files[0])
    image_header = get_unit_image_header()
    assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [3], np.ubyte)
Exemplo n.º 3
0
def test_store_inference_results(
        test_output_dirs: OutputFolderForTests) -> None:
    np.random.seed(0)
    num_classes = 2
    posterior = torch.nn.functional.softmax(torch.from_numpy(
        np.random.random_sample((num_classes, dim_z, dim_y, dim_x))),
                                            dim=0).numpy()
    segmentation = np.argmax(posterior, axis=0)
    assert segmentation.shape == (dim_z, dim_y, dim_x)

    posterior0 = to_unique_bytes(posterior[0], (0, 1))
    posterior1 = to_unique_bytes(posterior[1], (0, 1))
    spacing = (2.0, 2.0, 2.0)
    header = get_unit_image_header(spacing=spacing)
    inference_result = InferencePipeline.Result(epoch=1,
                                                patient_id=12,
                                                posteriors=posterior,
                                                segmentation=segmentation,
                                                voxel_spacing_mm=(1, 1, 1))

    test_config = _create_config_with_folders(test_output_dirs)

    assert test_config.class_and_index_with_background() == {
        "background": 0,
        "region": 1
    }

    results_folder = test_output_dirs.root_dir
    store_inference_results(inference_result, test_config,
                            Path(results_folder), header)

    assert_nifti_content(
        results_folder / "012" / "posterior_background.nii.gz",
        segmentation.shape, header, list(posterior0), np.ubyte)

    assert_nifti_content(results_folder / "012" / "posterior_region.nii.gz",
                         segmentation.shape, header, list(posterior1),
                         np.ubyte)

    assert_nifti_content(results_folder / "012" / "background.nii.gz",
                         segmentation.shape, header, list([0, 1]), np.ubyte)

    assert_nifti_content(results_folder / "012" / "region.nii.gz",
                         segmentation.shape, header, list([0, 1]), np.ubyte)

    assert_nifti_content(results_folder / "012" / DEFAULT_RESULT_IMAGE_NAME,
                         segmentation.shape, header,
                         list(np.unique(segmentation)), np.ubyte)

    assert_nifti_content(results_folder / "012" / "uncertainty.nii.gz",
                         inference_result.uncertainty.shape, header,
                         list([248, 249, 253, 254]), np.ubyte)
def test_register_and_score_model(is_ensemble: bool,
                                  dataset_expected_spacing_xyz: Any,
                                  model_outside_package: bool,
                                  test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
    1) Registering a pre-trained model to AML
    2) Checking that a model zip from the registered model can be created successfully
    3) Calling the scoring pipeline to check inference can be run from the published model successfully
    """
    ws = get_default_workspace()
    # Get an existing config as template
    loader = get_model_loader("Tests.ML.configs" if model_outside_package else None)
    config: SegmentationModelBase = loader.create_model_config_from_name(
        model_name="BasicModel2EpochsOutsidePackage" if model_outside_package else "BasicModel2Epochs"
    )
    config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz
    config.set_output_to(test_output_dirs.root_dir)
    # copy checkpoints into the outputs (simulating a run)
    stored_checkpoints = full_ml_test_data_path(os.path.join("train_and_test_data", "checkpoints"))
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    paths = [config.checkpoint_folder / "1_checkpoint.pth.tar"]
    checkpoints = paths * 2 if is_ensemble else paths
    model = None
    model_path = None
    # Mocking to get the source from the current directory
    # the score.py and python_wrapper.py cannot be moved inside the InnerEye package, which will be the
    # only code running (if these tests are run on the package).
    with mock.patch('InnerEye.Common.fixed_paths.repository_root_directory',
                    return_value=tests_root_directory().parent):
        try:
            tags = {"model_name": config.model_name}
            azure_config = get_default_azure_config()
            if model_outside_package:
                azure_config.extra_code_directory = "Tests"  # contains DummyModel
            deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(cfg.model_name), azure_cfg.docker_shm_size)
            ml_runner = MLRunner(config, azure_config, model_deployment_hook=deployment_hook)
            model, deployment_path, deployment_details = ml_runner.register_segmentation_model(
                workspace=ws,
                tags=tags,
                best_epoch=0,
                best_epoch_dice=0,
                checkpoint_paths=checkpoints,
                model_proc=ModelProcessing.DEFAULT)
            assert model is not None
            model_path = Path(model.get_model_path(model.name, model.version, ws))
            assert (model_path / fixed_paths.ENVIRONMENT_YAML_FILE_NAME).exists()
            assert (model_path / Path("InnerEye/ML/runner.py")).exists()
            assert deployment_path == Path(config.model_name)
            assert deployment_details == azure_config.docker_shm_size

            # move test data into the data folder to simulate an actual run
            train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")

            img_channel_1_name = "id1_channel1.nii.gz"
            img_channel_1_path = train_and_test_data_dir / img_channel_1_name
            img_channel_2_name = "id1_channel2.nii.gz"
            img_channel_2_path = train_and_test_data_dir / img_channel_2_name

            # download the registered model and test that we can run the score pipeline on it
            model_root = Path(model.download(str(test_output_dirs.root_dir)))
            # create a dummy datastore to store model checkpoints and image data
            # this simulates the code shapshot being executed in a real run
            test_datastore = test_output_dirs.root_dir / "test_datastore"
            shutil.move(
                str(model_root / "test_outputs"),
                str(test_datastore / RELATIVE_TEST_OUTPUTS_PATH)
            )
            data_root = test_datastore / DEFAULT_DATA_FOLDER
            os.makedirs(data_root)
            shutil.copy(str(img_channel_1_path), data_root)
            shutil.copy(str(img_channel_2_path), data_root)

            # run score pipeline as a separate process using the python_wrapper.py code to simulate a real run
            return_code = SubprocessConfig(process="python", args=[
                str(model_root / "python_wrapper.py"),
                "--spawnprocess=python",
                str(model_root / "score.py"),
                f"--data-folder={str(test_datastore)}",
                f"--test_image_channels={img_channel_1_name},{img_channel_2_name}",
                "--use_gpu=False"
            ]).spawn_and_monitor_subprocess()

            # check that the process completed as expected
            assert return_code == 0
            expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME
            assert expected_segmentation_path.exists()

            # sanity check the resulting segmentation
            expected_shape = get_nifti_shape(img_channel_1_path)
            image_header = get_unit_image_header()
            assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [0], np.ubyte)

        finally:
            # delete the registered model, and any downloaded artifacts
            shutil.rmtree(test_output_dirs.root_dir)
            if model and model_path:
                model.delete()
                shutil.rmtree(model_path)
def test_visualize_patch_sampling(test_output_dirs: TestOutputDirectories,
                                  labels_to_boundary: bool) -> None:
    """
    Tests if patch sampling and producing diagnostic images works as expected.
    :param test_output_dirs:
    :param labels_to_boundary: If true, the ground truth labels are placed close to the image boundary, so that
    crops have to be adjusted inwards. If false, ground truth labels are all far from the image boundaries.
    """
    set_random_seed(0)
    shape = (10, 30, 30)
    foreground_classes = ["fg"]
    class_weights = equally_weighted_classes(foreground_classes)
    config = SegmentationModelBase(should_validate=False,
                                   crop_size=(2, 10, 10),
                                   class_weights=class_weights)
    image = np.random.rand(1, *shape).astype(np.float32) * 1000
    mask = np.ones(shape)
    labels = np.zeros((len(class_weights), ) + shape)
    if labels_to_boundary:
        # Generate foreground labels in such a way that a patch centered around a foreground pixel would
        # reach outside of the image.
        labels[1, 4:8, 3:27, 3:27] = 1
    else:
        labels[1, 4:8, 15:18, 15:18] = 1
    labels[0] = 1 - labels[1]
    output_folder = Path(test_output_dirs.root_dir)
    image_header = get_unit_image_header()
    sample = Sample(image=image,
                    mask=mask,
                    labels=labels,
                    metadata=PatientMetadata(patient_id='123',
                                             image_header=image_header))
    expected_folder = full_ml_test_data_path("patch_sampling")
    heatmap = visualize_random_crops(sample,
                                     config,
                                     output_folder=output_folder)
    expected_heatmap = expected_folder / ("sampled_to_boundary.npy"
                                          if labels_to_boundary else
                                          "sampled_center.npy")
    # To update the stored results, uncomment this line:
    # np.save(str(expected_heatmap), heatmap)
    assert np.allclose(heatmap, np.load(
        str(expected_heatmap))), "Patch sampling created a different heatmap."
    f1 = output_folder / "123_ct.nii.gz"
    assert_file_exists(f1)
    f2 = output_folder / "123_sampled_patches.nii.gz"
    assert_file_exists(f2)
    thumbnails = [
        "123_sampled_patches_dim0.png",
        "123_sampled_patches_dim1.png",
        "123_sampled_patches_dim2.png",
    ]
    for f in thumbnails:
        assert_file_exists(output_folder / f)

    expected = expected_folder / ("sampled_to_boundary.nii.gz"
                                  if labels_to_boundary else
                                  "sampled_center.nii.gz")
    # To update test results:
    # shutil.copy(str(f2), str(expected))
    expected_image = io_util.load_nifti_image(expected)
    actual_image = io_util.load_nifti_image(f2)
    np.allclose(expected_image.image, actual_image.image)
    if labels_to_boundary:
        for f in thumbnails:
            # Uncomment this line to update test results
            # (expected_folder / f).write_bytes((output_folder / f).read_bytes())
            if not is_running_on_azure():
                # When running on the Azure build agents, it appears that the bounding box of the images
                # is slightly different than on local runs, even with equal dpi settings.
                # Not able to figure out how to make the run results consistent, hence disable in cloud runs.
                assert_binary_files_match(output_folder / f,
                                          expected_folder / f)
def test_register_and_score_model(
        is_ensemble: bool, dataset_expected_spacing_xyz: Any,
        model_outside_package: bool,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
    1) Registering a pre-trained model to AML
    2) Checking that a model zip from the registered model can be created successfully
    3) Calling the scoring pipeline to check inference can be run from the published model successfully
    """
    # We are creating checkpoints on the fly in this test, writing a randomly initialized model.
    set_random_seed(0)
    # Get an existing config as template
    loader = get_model_loader(
        "Tests.ML.configs" if model_outside_package else None)
    config: SegmentationModelBase = loader.create_model_config_from_name(
        model_name="BasicModel2EpochsOutsidePackage"
        if model_outside_package else "BasicModel2Epochs")
    config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz
    config.set_output_to(test_output_dirs.root_dir)
    checkpoints_absolute = []
    model_and_info = ModelAndInfo(
        config=config, model_execution_mode=ModelExecutionMode.TRAIN)
    model_and_info.create_model()
    model_and_info.create_optimizer()
    checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=10))
    if is_ensemble:
        checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=20))
    checkpoints_relative = [
        f.relative_to(config.checkpoint_folder) for f in checkpoints_absolute
    ]
    azureml_model = None
    # Simulate a project root: We can't derive that from the repository root because that might point
    # into Python's package folder
    project_root = Path(__file__).parent.parent
    # Double-check that we are at the right place, by testing for a file that would quite certainly not be found
    # somewhere else
    assert (project_root / fixed_paths.SCORE_SCRIPT).is_file()
    try:
        azure_config = get_default_azure_config()
        if model_outside_package:
            azure_config.extra_code_directory = "Tests"  # contains BasicModel2EpochsOutsidePackage
        deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(
            cfg.model_name), azure_cfg.docker_shm_size)
        ml_runner = MLRunner(config,
                             azure_config,
                             project_root=project_root,
                             model_deployment_hook=deployment_hook)
        registration_result = ml_runner.register_segmentation_model(
            model_description="",
            checkpoint_paths=checkpoints_absolute,
            model_proc=ModelProcessing.DEFAULT)
        assert registration_result is not None
        azureml_model, deployment_result = registration_result
        assert azureml_model is not None
        assert deployment_result == (Path(config.model_name),
                                     azure_config.docker_shm_size)

        # download the registered model and test that we can run the score pipeline on it
        model_root = Path(
            azureml_model.download(str(test_output_dirs.root_dir)))
        # The model needs to contain score.py at the root, the (merged) environment definition,
        # and the inference config.
        expected_files = [
            *fixed_paths.SCRIPTS_AT_ROOT,
            fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
            fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
            "InnerEye/ML/runner.py",
        ]
        # All checkpoints go into their own folder
        expected_files.extend(
            str(Path(CHECKPOINT_FOLDER) / c) for c in checkpoints_relative)
        for expected_file in expected_files:
            assert (model_root /
                    expected_file).is_file(), f"File {expected_file} missing"

        # create a dummy datastore to store the image data
        test_datastore = test_output_dirs.root_dir / "test_datastore"
        # move test data into the data folder to simulate an actual run
        train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
        img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"]
        data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER
        data_root.mkdir(parents=True)
        for f in img_files:
            shutil.copy(str(train_and_test_data_dir / f), str(data_root))

        # run score pipeline as a separate process
        python_executable = sys.executable
        [return_code1,
         stdout1] = SubprocessConfig(process=python_executable,
                                     args=["--version"
                                           ]).spawn_and_monitor_subprocess()
        assert return_code1 == 0
        print(f"Executing Python version {stdout1[0]}")
        return_code, stdout2 = SubprocessConfig(
            process=python_executable,
            args=[
                str(model_root / fixed_paths.SCORE_SCRIPT),
                f"--data_folder={str(data_root)}",
                f"--image_files={img_files[0]},{img_files[1]}",
                "--use_gpu=False"
            ]).spawn_and_monitor_subprocess()

        # check that the process completed as expected
        assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}"
        expected_segmentation_path = Path(
            model_root) / DEFAULT_RESULT_IMAGE_NAME
        assert expected_segmentation_path.exists(
        ), f"Result file not found: {expected_segmentation_path}"

        # sanity check the resulting segmentation
        expected_shape = get_nifti_shape(train_and_test_data_dir /
                                         img_files[0])
        image_header = get_unit_image_header()
        assert_nifti_content(str(expected_segmentation_path), expected_shape,
                             image_header, [3], np.ubyte)

    finally:
        # delete the registered model
        if azureml_model:
            azureml_model.delete()
Exemplo n.º 7
0
def visualize_random_crops(sample: Sample, config: SegmentationModelBase,
                           output_folder: Path) -> np.ndarray:
    """
    Simulate the effect of sampling random crops (as is done for trainig segmentation models), and store the results
    as a Nifti heatmap and as 3 axial/sagittal/coronal slices. The heatmap and the slices are stored in the given
    output folder, with filenames that contain the patient ID as the prefix.
    :param sample: The patient information from the dataset, with scans and ground truth labels.
    :param config: The model configuration.
    :param output_folder: The folder into which the heatmap and thumbnails should be written.
    :return: A numpy array that has the same size as the image, containing how often each voxel was contained in
    """
    output_folder.mkdir(exist_ok=True, parents=True)
    sample = CroppingDataset.create_possibly_padded_sample_for_cropping(
        sample=sample,
        crop_size=config.crop_size,
        padding_mode=config.padding_mode)
    logging.info(f"Processing sample: {sample.patient_id}")
    # Exhaustively sample with random crop function
    image_channel0 = sample.image[0]
    heatmap = np.zeros(image_channel0.shape, dtype=np.uint16)
    # Number of repeats should fit into the range of UInt16, because we will later save the heatmap as an integer
    # Nifti file of that datatype.
    repeats = 200
    for _ in range(repeats):
        slicers, _ = augmentation.slicers_for_random_crop(
            sample=sample,
            crop_size=config.crop_size,
            class_weights=config.class_weights)
        heatmap[slicers[0], slicers[1], slicers[2]] += 1
    is_3dim = heatmap.shape[0] > 1
    header = sample.metadata.image_header
    if not header:
        logging.warning(
            f"No image header found for patient {sample.patient_id}. Using default header."
        )
        header = get_unit_image_header()
    if is_3dim:
        ct_output_name = str(output_folder / f"{sample.patient_id}_ct.nii.gz")
        heatmap_output_name = str(
            output_folder / f"{sample.patient_id}_sampled_patches.nii.gz")
        io_util.store_as_nifti(image=heatmap,
                               header=header,
                               file_name=heatmap_output_name,
                               image_type=heatmap.dtype,
                               scale=False)
        io_util.store_as_nifti(image=image_channel0,
                               header=header,
                               file_name=ct_output_name,
                               image_type=sample.image.dtype,
                               scale=False)
    heatmap_scaled = heatmap.astype(dtype=np.float) / heatmap.max()
    # If the incoming image is effectively a 2D image with degenerate Z dimension, then only plot a single
    # axial thumbnail. Otherwise, plot thumbnails for all 3 dimensions.
    dimensions = list(range(3)) if is_3dim else [0]
    # Center the 3 thumbnails at one of the points where the heatmap attains a maximum. This should ensure that
    # the thumbnails are in an area where many of the organs of interest are located.
    max_heatmap_index = np.unravel_index(
        heatmap.argmax(), heatmap.shape) if is_3dim else (0, 0, 0)
    for dimension in dimensions:
        plt.clf()
        scan_with_transparent_overlay(
            scan=image_channel0,
            overlay=heatmap_scaled,
            dimension=dimension,
            position=max_heatmap_index[dimension] if is_3dim else 0,
            spacing=header.spacing)
        # Construct a filename that has a dimension suffix if we are generating 3 of them. For 2dim images, skip
        # the suffix.
        thumbnail = f"{sample.patient_id}_sampled_patches"
        if is_3dim:
            thumbnail += f"_dim{dimension}"
        thumbnail += ".png"
        resize_and_save(width_inch=5,
                        height_inch=5,
                        filename=output_folder / thumbnail)
    return heatmap