Пример #1
0
def test_try_create_model_load_from_checkpoint_and_adjust(config: ModelConfigBase, checkpoint_path: str,
                                                          model_execution_mode: ModelExecutionMode) -> None:
    config.use_gpu = True

    # no checkpoint path provided
    model_and_info = ModelAndInfo(config,
                                  model_execution_mode=model_execution_mode,
                                  checkpoint_path=None)

    with pytest.raises(ValueError):
        model_and_info.model

    model_loaded = model_and_info.try_create_model_load_from_checkpoint_and_adjust()
    assert model_loaded
    assert isinstance(model_and_info.model, DataParallelModel)

    # Invalid checkpoint path provided
    model_and_info = ModelAndInfo(config,
                                  model_execution_mode=model_execution_mode,
                                  checkpoint_path=full_ml_test_data_path("non_exist.pth.tar"))
    model_loaded = model_and_info.try_create_model_load_from_checkpoint_and_adjust()
    assert not model_loaded
    # Current code assumes that even if this function returns False, the model itself was created, only the checkpoint
    # loading failed.
    assert isinstance(model_and_info.model, DataParallelModel)

    # Valid checkpoint path provided
    model_and_info = ModelAndInfo(config,
                                  model_execution_mode=model_execution_mode,
                                  checkpoint_path=full_ml_test_data_path(checkpoint_path))
    model_loaded = model_and_info.try_create_model_load_from_checkpoint_and_adjust()
    assert model_loaded
    assert isinstance(model_and_info.model, DataParallelModel)
    assert model_and_info.checkpoint_epoch == 1
def test_create_from_checkpoint_ensemble() -> None:
    config = ClassificationModelForTesting()

    checkpoint_folder_non_exist = "classification_data_generated_random/checkpoints/non_exist.pth.tar"
    path_to_checkpoint_non_exist = full_ml_test_data_path(
        checkpoint_folder_non_exist)
    checkpoint_folder_exist = "classification_data_generated_random/checkpoints/1_checkpoint.pth.tar"
    path_to_checkpoint_exist = full_ml_test_data_path(checkpoint_folder_exist)

    # when all checkpoints do not exist, raise error
    with pytest.raises(ValueError):
        paths_to_checkpoint = [path_to_checkpoint_non_exist] * 5
        ScalarEnsemblePipeline.create_from_checkpoint(paths_to_checkpoint,
                                                      config)

    # when a few checkpoints exist, ensemble with those
    paths_to_checkpoint = [path_to_checkpoint_non_exist
                           ] * 3 + [path_to_checkpoint_exist] * 2
    inference_pipeline = ScalarEnsemblePipeline.create_from_checkpoint(
        paths_to_checkpoint, config)
    assert isinstance(inference_pipeline, ScalarEnsemblePipeline)
    assert len(inference_pipeline.pipelines) == 2

    # when all checkpoints exist
    paths_to_checkpoint = [path_to_checkpoint_exist] * 5
    inference_pipeline = ScalarEnsemblePipeline.create_from_checkpoint(
        paths_to_checkpoint, config)
    assert isinstance(inference_pipeline, ScalarEnsemblePipeline)
    assert len(inference_pipeline.pipelines) == 5
def test_get_checkpoints_to_test(
        test_output_dirs: OutputFolderForTests) -> None:
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(test_output_dirs.root_dir)
    config.outputs_folder.mkdir()
    manage_recovery = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)

    # Set a local_weights_path to get checkpoint from. Model has not trained and no run recovery provided,
    # so the local weights should be used ignoring any epochs to test
    config.epochs_to_test = [1, 2]
    local_weights_path = test_output_dirs.root_dir / "exist.pth"
    stored_checkpoint = create_checkpoint_path(
        full_ml_test_data_path("checkpoints"), epoch=1)
    shutil.copyfile(str(stored_checkpoint), local_weights_path)
    config.local_weights_path = local_weights_path
    manage_recovery.discover_and_download_checkpoints_from_previous_runs()
    checkpoint_and_paths = manage_recovery.get_checkpoints_to_test()
    assert checkpoint_and_paths
    assert len(checkpoint_and_paths) == 1
    assert checkpoint_and_paths[0].epoch == 0
    assert checkpoint_and_paths[0].checkpoint_paths == [
        manage_recovery.model_config.outputs_folder / WEIGHTS_FILE
    ]

    # Now set a run recovery object and set the start epoch to 1, so we get one epoch from
    # run recovery and one from the training checkpoints
    manage_recovery.azure_config.run_recovery_id = DEFAULT_RUN_RECOVERY_ID
    config.start_epoch = 1
    manage_recovery.additional_training_done()
    manage_recovery.discover_and_download_checkpoints_from_previous_runs()
    # Copy checkpoint to make it seem like training has happened
    stored_checkpoint = create_checkpoint_path(
        path=full_ml_test_data_path("checkpoints"), epoch=1)
    expected_checkpoint = create_checkpoint_path(path=config.checkpoint_folder,
                                                 epoch=2)
    shutil.copyfile(str(stored_checkpoint), str(expected_checkpoint))

    checkpoint_and_paths = manage_recovery.get_checkpoints_to_test()

    assert checkpoint_and_paths
    assert len(checkpoint_and_paths) == 2
    assert checkpoint_and_paths[0].epoch == 1
    assert checkpoint_and_paths[0].checkpoint_paths == [
        create_checkpoint_path(path=config.checkpoint_folder /
                               DEFAULT_RUN_RECOVERY_ID.split(":")[1],
                               epoch=1)
    ]
    assert checkpoint_and_paths[1].epoch == 2
    assert checkpoint_and_paths[1].checkpoint_paths == [
        create_checkpoint_path(path=config.checkpoint_folder, epoch=2)
    ]

    # This epoch does not exist
    config.epochs_to_test = [3]
    checkpoint_and_paths = manage_recovery.get_checkpoints_to_test()
    assert checkpoint_and_paths is None
Пример #4
0
def test_create_summary(test_output_dirs: TestOutputDirectories) -> None:
    """
    Test that summaries of CV performance per mode, and per mode per structure, look like they should.
    """
    root = Path(test_output_dirs.root_dir)
    test_file = full_ml_test_data_path("MetricsAcrossAllRuns.csv")
    df = pd.read_csv(test_file)
    file1, file2 = create_results_breakdown(df, root)
    expected1 = full_ml_test_data_path(METRICS_BY_MODE_AND_STRUCTURE_FILE)
    expected2 = full_ml_test_data_path(METRICS_BY_MODE_FILE)
    assert file1.read_text() == expected1.read_text()
    assert file2.read_text() == expected2.read_text()
def test_create_from_checkpoint_non_ensemble() -> None:
    config = ClassificationModelForTesting()

    # when checkpoint does not exist, return None
    checkpoint_folder = "classification_data_generated_random/checkpoints/non_exist.pth.tar"
    path_to_checkpoint = full_ml_test_data_path(checkpoint_folder)
    inference_pipeline = ScalarInferencePipeline.create_from_checkpoint(path_to_checkpoint, config)
    assert inference_pipeline is None

    checkpoint_folder = "classification_data_generated_random/checkpoints/1_checkpoint.pth.tar"
    path_to_checkpoint = full_ml_test_data_path(checkpoint_folder)
    inference_pipeline = ScalarInferencePipeline.create_from_checkpoint(path_to_checkpoint, config)
    assert isinstance(inference_pipeline, ScalarInferencePipeline)
    assert inference_pipeline.epoch == 1
def test_try_create_optimizer_and_load_from_checkpoint(
        config: ModelConfigBase, checkpoint_path: str) -> None:
    # no checkpoint path provided
    model_and_info = ModelAndInfo(config,
                                  model_execution_mode=ModelExecutionMode.TEST,
                                  is_mean_teacher=False,
                                  checkpoint_path=None)

    with pytest.raises(ValueError):
        model_and_info.optimizer

    model_loaded = model_and_info.try_create_model_and_load_from_checkpoint()
    assert model_loaded
    optimizer_loaded = model_and_info.try_create_optimizer_and_load_from_checkpoint(
    )
    assert optimizer_loaded
    assert isinstance(model_and_info.optimizer, Optimizer)

    # Invalid checkpoint path provided
    model_and_info = ModelAndInfo(
        config,
        model_execution_mode=ModelExecutionMode.TEST,
        is_mean_teacher=False,
        checkpoint_path=full_ml_test_data_path("non_exist.pth.tar"))
    model_loaded = model_and_info.try_create_model_and_load_from_checkpoint()
    assert not model_loaded
    # Current code assumes that even if this function returns False, the model itself was created, only the checkpoint
    # loading failed.
    optimizer_loaded = model_and_info.try_create_optimizer_and_load_from_checkpoint(
    )
    assert not optimizer_loaded
    # Current code assumes that even if this function returns False,
    # the optimizer itself was created, only the checkpoint loading failed.
    assert isinstance(model_and_info.optimizer, Optimizer)

    # Valid checkpoint path provided
    model_and_info = ModelAndInfo(
        config,
        model_execution_mode=ModelExecutionMode.TEST,
        is_mean_teacher=False,
        checkpoint_path=full_ml_test_data_path(checkpoint_path))
    model_loaded = model_and_info.try_create_model_and_load_from_checkpoint()
    assert model_loaded
    assert model_and_info.checkpoint_epoch == 1
    optimizer_loaded = model_and_info.try_create_optimizer_and_load_from_checkpoint(
    )
    assert optimizer_loaded
    assert isinstance(model_and_info.optimizer, Optimizer)
    assert model_and_info.checkpoint_epoch == 1
 def _load_and_scale_image(name: str) -> ImageWithHeader:
     image_with_header = load_nifti_image(full_ml_test_data_path(name))
     return ImageWithHeader(image=LinearTransform.transform(
         data=image_with_header.image,
         input_range=(0, 255),
         output_range=(0, 1)),
                            header=image_with_header.header)
def test_metrics_preparation_for_classification(
        perform_sub_fold_cross_validation: bool) -> None:
    """
    Test if metrics from classification models can be loaded and prepared. The files in question are checked in,
    and were downloaded from a run on AzureML.
    """
    files, plotting_config = load_result_files_for_classification(
        perform_sub_fold_cross_validation)
    downloaded_metrics = load_dataframes(files, plotting_config)
    assert ModelExecutionMode.TEST not in downloaded_metrics
    metrics = downloaded_metrics[ModelExecutionMode.VAL]
    assert metrics is not None
    expected_metrics_file = "metrics_preparation_for_sub_fold_classification_VAL.csv" \
        if perform_sub_fold_cross_validation else "metrics_preparation_for_classification_VAL.csv"
    expected_df_csv = full_ml_test_data_path(
        "plot_cross_validation") / expected_metrics_file
    metrics = metrics.sort_values(list(metrics.columns),
                                  ascending=True).reset_index(drop=True)
    # To write new test results:
    # metrics.to_csv(expected_df_csv, index=False)
    expected_df = pd.read_csv(expected_df_csv).sort_values(
        list(metrics.columns), ascending=True).reset_index(drop=True)
    pd.testing.assert_frame_equal(expected_df,
                                  metrics,
                                  check_like=True,
                                  check_dtype=False)
def test_get_dataset_splits() -> None:
    """
    Test if dataset splits are created as expected for scalar models.
    """
    model = ClassificationModelForTesting()
    model.local_dataset = full_ml_test_data_path(
        "classification_data_sub_fold_cv")
    model.number_of_cross_validation_splits = 2
    dataset_splits = model.get_dataset_splits()
    assert list(
        dataset_splits[ModelExecutionMode.TRAIN].subjectID.unique()) == [
            'S4', 'S5', 'S2', 'S10'
        ]
    assert list(dataset_splits[ModelExecutionMode.VAL].subjectID.unique()) == [
        'S1', 'S6', 'S7', 'S8'
    ]
    assert list(
        dataset_splits[ModelExecutionMode.TEST].subjectID.unique()) == [
            'S3', 'S9'
        ]
    # check if sub-folds are created as expected
    model.number_of_cross_validation_splits_per_fold = 2
    sub_fold_dataset_splits = model.get_dataset_splits()
    # the validation and the test set must be the same for parent and sub fold
    pd.testing.assert_frame_equal(dataset_splits.val,
                                  sub_fold_dataset_splits.val,
                                  check_like=True,
                                  check_dtype=False)
    pd.testing.assert_frame_equal(dataset_splits.test,
                                  sub_fold_dataset_splits.test,
                                  check_like=True,
                                  check_dtype=False)
    # make sure the training set is the expected subset of the parent
    assert list(sub_fold_dataset_splits[
        ModelExecutionMode.TRAIN].subjectID.unique()) == ['S2', 'S10']
Пример #10
0
def load_train_and_test_data_channels(
        patient_ids: List[int],
        normalization_fn: PhotometricNormalization) -> List[Sample]:
    if np.any(np.asarray(patient_ids) <= 0):
        raise ValueError("data_items must be >= 0")

    file_name = lambda k, y: full_ml_test_data_path("train_and_test_data"
                                                    ) / f"id{k}_{y}.nii.gz"

    get_sample = lambda z: io_util.load_images_from_dataset_source(
        dataset_source=PatientDatasetSource(
            metadata=PatientMetadata(patient_id=z),
            image_channels=[file_name(z, c) for c in TEST_CHANNEL_IDS],
            mask_channel=file_name(z, TEST_MASK_ID),
            ground_truth_channels=[file_name(z, TEST_GT_ID)]))

    samples = []
    for x in patient_ids:
        sample = get_sample(x)
        sample = Sample(image=normalization_fn.transform(
            sample.image, sample.mask),
                        mask=sample.mask,
                        labels=sample.labels,
                        metadata=sample.metadata)
        samples.append(sample)

    return samples
Пример #11
0
def test_download_or_get_local_file_2(test_output_dirs: TestOutputDirectories) -> None:
    config = PlotCrossValidationConfig(run_recovery_id=None,
                                       model_category=ModelCategory.Classification,
                                       epoch=None,
                                       should_validate=False)
    download_to_folder = Path(test_output_dirs.root_dir) / CROSSVAL_RESULTS_FOLDER
    config.outputs_directory = str(download_to_folder)
    local_results = full_ml_test_data_path("plot_cross_validation") / "HD_cfff5ceb-a227-41d6-a23c-0ebbc33b6301"
    config.local_run_results = str(local_results)
    # A file that sits in the root folder of the local_results should be downloaded into the
    # root of the download_to folder
    file1 = "dummy.txt"
    file_in_folder = config.download_or_get_local_file(None,
                                                       file1,
                                                       download_to_folder)
    assert file_in_folder is not None
    assert file_in_folder == download_to_folder / file1

    # Copying a file in a sub-folder of the local_results: The full path to the file should be
    # preserved and created in the download_to folder.
    file2 = Path("0") / "Val" / "metrics.csv"
    file_in_folder = config.download_or_get_local_file(None,
                                                       file2,
                                                       download_to_folder)
    assert file_in_folder is not None
    assert file_in_folder == download_to_folder / file2
def test_split_by_subject_ids_invalid(splits: List[List[int]]) -> None:
    df1 = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))
    with pytest.raises(ValueError):
        DatasetSplits.from_subject_ids(df1,
                                       train_ids=splits[0],
                                       val_ids=splits[1],
                                       test_ids=splits[2])
Пример #13
0
def test_plot_image_and_multiple_contours(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test plotting of an image with two overlaid contours.
    """
    size = (3, 3)
    image = np.zeros(size)
    image[0, 0] = -1
    image[2, 2] = 1
    labels1 = np.zeros(size)
    labels1[1, 1] = 1
    labels2 = np.zeros(size)
    labels2[0, 0] = 1
    file_name = "image_and_multiple_contours.png"
    plot_file = Path(test_output_dirs.root_dir) / file_name
    args1 = {'colors': 'r', 'linestyles': 'dashed'}
    args2 = {'colors': 'b'}
    plotting.plot_image_and_label_contour(image, [labels1, labels2],
                                          contour_arguments=[args1, args2],
                                          plot_file_name=plot_file)
    assert plot_file.exists()
    expected = full_ml_test_data_path(file_name)
    # To update the stored results, uncomment this line:
    # expected.write_bytes(plot_file.read_bytes())
    assert file_as_bytes(plot_file) == file_as_bytes(expected)
Пример #14
0
def create_run_result_file_list(config: PlotCrossValidationConfig, folder: str,
                                perform_sub_fold_cross_validation: bool = False) -> List[RunResultFiles]:
    """
    Creates a list of input files for cross validation analysis, from files stored inside of the test data folder.
    :param config: The overall cross validation config
    :param folder: The folder to read from, inside of test_data/plot_cross_validation.
    :param perform_sub_fold_cross_validation: If True then create input files for sub fold cross validation analysis.
    :return:
    """
    full_folder = full_ml_test_data_path("plot_cross_validation") / folder
    files: List[RunResultFiles] = []
    previous_dataset_file = None
    for split in ["0", "1", "1", "1"] if perform_sub_fold_cross_validation else ["0", "1"]:
        for mode in config.execution_modes_to_download():
            metrics_file = full_folder / split / mode.value / METRICS_FILE_NAME
            dataset_file: Optional[Path] = full_folder / split / DATASET_CSV_FILE_NAME
            if dataset_file.exists():  # type: ignore
                # Reduce amount of checked-in large files. dataset files can be large, and usually duplicate across
                # runs. Store only a copy in split 0, re-use in split 1.
                previous_dataset_file = dataset_file
            else:
                dataset_file = previous_dataset_file
            if metrics_file.exists():
                file = RunResultFiles(execution_mode=mode,
                                      metrics_file=metrics_file,
                                      dataset_csv_file=dataset_file,
                                      run_recovery_id=config.run_recovery_id + "_" + split,  # type: ignore
                                      split_index=split)
                files.append(file)
    return files
def test_submit_for_inference(test_output_dirs: OutputFolderForTests) -> None:
    """
    Execute the submit_for_inference script on the model that was recently trained. This starts an AzureML job,
    and downloads the segmentation. Then check if the segmentation was actually produced.
    :return:
    """
    model = get_most_recent_model()
    image_file = fixed_paths_for_tests.full_ml_test_data_path(
    ) / "train_and_test_data" / "id1_channel1.nii.gz"
    assert image_file.exists(), f"Image file not found: {image_file}"
    settings_file = fixed_paths.SETTINGS_YAML_FILE
    assert settings_file.exists(), f"Settings file not found: {settings_file}"
    azure_config = AzureConfig.from_yaml(
        settings_file, project_root=fixed_paths.repository_root_directory())
    # Read the name of the branch from environment, so that the inference experiment is also listed alongside
    # all other AzureML runs that belong to the current PR.
    build_branch = os.environ.get("BUILD_BRANCH", None)
    experiment_name = to_azure_friendly_string(
        build_branch) if build_branch else "model_inference"
    azure_config.get_git_information()
    args = [
        "--image_file",
        str(image_file), "--model_id", model.id, "--settings",
        str(settings_file), "--download_folder",
        str(test_output_dirs.root_dir), "--cluster", "training-nc12",
        "--experiment", experiment_name
    ]
    seg_path = test_output_dirs.root_dir / DEFAULT_RESULT_IMAGE_NAME
    assert not seg_path.exists(
    ), f"Result file {seg_path} should not yet exist"
    submit_for_inference.main(
        args, project_root=fixed_paths.repository_root_directory())
    assert seg_path.exists(), f"Result file {seg_path} was not created"
def test_save_file(value: Any, expected: Any) -> None:
    file = full_ml_test_data_path("test.txt")
    io_util.save_lines_to_file(Path(file), value)

    assert_file_contents(file, expected)

    os.remove(str(file))
Пример #17
0
def compare_files(actual: List[Path], expected: List[str]) -> None:
    assert len(actual) == len(expected)
    for (f, e) in zip(actual, expected):
        assert f.exists()
        full_expected = full_ml_test_data_path(e)
        assert full_expected.exists()
        assert str(f).endswith(e)
        assert file_as_bytes(f) == file_as_bytes(full_expected)
def test_split_by_institution_invalid(splits: List[float]) -> None:
    df1 = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))
    with pytest.raises(ValueError):
        DatasetSplits.from_institutions(df1,
                                        splits[0],
                                        splits[1],
                                        splits[2],
                                        shuffle=False)
def test_try_create_model_and_load_from_checkpoint(
        config: ModelConfigBase, checkpoint_path: str) -> None:
    # no checkpoint path provided
    model_and_info = ModelAndInfo(config,
                                  model_execution_mode=ModelExecutionMode.TEST,
                                  is_mean_teacher=False,
                                  checkpoint_path=None)

    with pytest.raises(ValueError):
        model_and_info.model

    model_loaded = model_and_info.try_create_model_and_load_from_checkpoint()
    assert model_loaded
    if isinstance(config, SegmentationModelBase):
        assert isinstance(model_and_info.model, BaseModel)
    else:
        assert isinstance(model_and_info.model, DeviceAwareModule)

    # Invalid checkpoint path provided
    model_and_info = ModelAndInfo(
        config,
        model_execution_mode=ModelExecutionMode.TEST,
        is_mean_teacher=False,
        checkpoint_path=full_ml_test_data_path("non_exist.pth.tar"))
    model_loaded = model_and_info.try_create_model_and_load_from_checkpoint()
    assert not model_loaded
    # Current code assumes that even if this function returns False, the model itself was created, only the checkpoint
    # loading failed.
    if isinstance(config, SegmentationModelBase):
        assert isinstance(model_and_info.model, BaseModel)
    else:
        assert isinstance(model_and_info.model, DeviceAwareModule)

    # Valid checkpoint path provided
    model_and_info = ModelAndInfo(
        config,
        model_execution_mode=ModelExecutionMode.TEST,
        is_mean_teacher=False,
        checkpoint_path=full_ml_test_data_path(checkpoint_path))
    model_loaded = model_and_info.try_create_model_and_load_from_checkpoint()
    assert model_loaded
    if isinstance(config, SegmentationModelBase):
        assert isinstance(model_and_info.model, BaseModel)
    else:
        assert isinstance(model_and_info.model, DeviceAwareModule)
    assert model_and_info.checkpoint_epoch == 1
Пример #20
0
def test_load_and_stack(file_path_str: str, expected_shape: Tuple) -> None:
    file_path = Path(file_path_str)
    files = [full_ml_test_data_path() / f for f in [file_path, file_path]]
    stacked = load_images_and_stack(files, load_segmentation=False)
    assert torch.is_tensor(stacked.segmentations)
    assert stacked.segmentations is not None
    assert stacked.segmentations.shape == (0,)
    assert torch.is_tensor(stacked.images)
    assert stacked.images.shape == (2,) + expected_shape
def test_create_inference_pipeline_invalid_epoch(
        config: ModelConfigBase, checkpoint_folder: str,
        test_output_dirs: TestOutputDirectories) -> None:
    config.set_output_to(test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    stored_checkpoints = full_ml_test_data_path(checkpoint_folder)
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    # no pipeline created when checkpoint for epoch does not exist
    assert create_inference_pipeline(config, 10) is None
Пример #22
0
def test_load_items_seq_from_dataset() -> None:
    """
    Test loading a sequence dataset with numerical, categorical features and images.
    """
    dummy_dataset = full_ml_test_data_path(
    ) / "sequence_data_for_classification" / "dataset.csv"
    df = pd.read_csv(dummy_dataset, sep=",", dtype=str)
    items: List[SequenceDataSource] = DataSourceReader[SequenceDataSource](
        data_frame=df,
        image_channels=None,
        image_file_column="IMG",
        label_channels=None,
        label_value_column="Label",
        numerical_columns=["NUM1", "NUM2", "NUM3", "NUM4"],
        sequence_column="Position").load_data_sources()
    assert len(items) == 3 * 9  # 3 subjects, 9 visits each, no missing
    assert items[0].metadata.id == "2137.00005"
    assert items[0].metadata.sequence_position == 0
    assert items[0].metadata.props["CAT2"] == "category_A"
    # One of the labels is missing, missing labels should be encoded as NaN
    assert math.isnan(items[0].label[0])
    assert items[0].channel_files == ["img_1"]
    assert str(items[0].numerical_non_image_features.tolist()) == str(
        [362.0, np.nan, np.nan, 71.0])
    assert items[8].metadata.id == "2137.00005"
    assert items[8].metadata.sequence_position == 8
    assert items[8].label.tolist() == [0.0]
    assert items[8].channel_files == ['']
    assert str(items[8].numerical_non_image_features.tolist()) == str(
        [350.0, np.nan, np.nan, 8.0])
    assert items[16].metadata.id == "2627.00001"
    assert items[16].label.tolist() == [0.0]
    assert items[16].channel_files == ["img_2"]
    assert_tensors_equal(items[16].numerical_non_image_features,
                         [217.0, 0.0, 0.01, 153.0])
    assert items[26].metadata.id == "3250.00005"
    assert items[26].metadata.sequence_position == 8
    assert_tensors_equal(items[26].label, [0.0])
    assert items[26].channel_files == ["img_11"]
    assert_tensors_equal(items[26].numerical_non_image_features,
                         [238.0, 0.0, 0.02, 84.0])

    grouped = group_samples_into_sequences(
        filter_valid_classification_data_sources_items(
            items, file_to_path_mapping=None,
            max_sequence_position_value=None))
    # There are 3 patients total, but one of them has missing measurements for all visits
    assert len(grouped) == 2
    assert grouped[0].id == "2627.00001"
    assert grouped[1].id == "3250.00005"
    # 2627.00001 has full information for weeks 0, 4, and 8
    assert len(grouped[0].items) == 3
    assert grouped[0].items[0].metadata["VISIT"] == "V1"
    assert grouped[0].items[2].metadata["VISIT"] == "VST 3"
    assert len(grouped[1].items) == 9
    assert items[16].metadata.sequence_position == 7
def _get_metrics_df(mode: ModelExecutionMode) -> pd.DataFrame:
    metrics_df = pd.read_csv(
        full_ml_test_data_path("{}_agg_splits.csv".format(mode.value)))
    # noinspection PyUnresolvedReferences
    metrics_df.split = [
        DEFAULT_ENSEMBLE_RUN_RECOVERY_ID + "_" + index
        for index in metrics_df.split.astype(str)
    ]
    return metrics_df.sort_values(list(metrics_df.columns),
                                  ascending=True).reset_index(drop=True)
Пример #24
0
def compare_files(actual: List[Path], expected: List[str]) -> None:
    assert len(actual) == len(expected)
    for (f, e) in zip(actual, expected):
        assert f.exists()
        full_expected = full_ml_test_data_path(e)
        assert full_expected.exists()
        assert str(f).endswith(e)
        # To update the stored results, uncomment this line:
        # full_expected.write_bytes(f.read_bytes())
        assert file_as_bytes(f) == file_as_bytes(full_expected)
Пример #25
0
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
                       test_output_dirs: OutputFolderForTests) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config_ensemble.outputs_directory = test_output_dirs.root_dir
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, test_config_ensemble.outputs_directory)
    f = f"{ModelExecutionMode.VAL.value}_outliers.txt"
    assert_text_files_match(full_file=test_config_ensemble.outputs_directory / f,
                            expected_file=full_ml_test_data_path(f))
Пример #26
0
def test_metrics_file(test_output_dirs: TestOutputDirectories) -> None:
    """Test if metrics files with Dice scores are written as expected."""
    folder = test_output_dirs.make_sub_dir("test_metrics_file")

    def new_file(suffix: str) -> str:
        file = os.path.join(folder, suffix)
        if os.path.exists(file):
            os.remove(file)
        return file

    d = MetricsPerPatientWriter()
    p1 = "Patient1"
    p2 = "Patient2"
    p3 = "Patient3"
    liver = "liver"
    kidney = "kidney"
    # Ordering for test data: For "liver", patient 2 has the lowest score, sorting should move them first
    # For "kidney", patient 1 has the lowest score and should be first.
    d.add(p1, liver, 1.0, 1.0, 0.5)
    d.add(p1, liver, 0.4, 1.0, 0.4)
    d.add(p2, liver, 0.8, 1.0, 0.3)
    d.add(p2, kidney, 0.7, 1.0, 0.2)
    d.add(p3, kidney, 0.4, 1.0, 0.1)
    metrics_file = new_file("metrics_file.csv")
    d.to_csv(Path(metrics_file))
    # Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
    assert_file_contents(
        metrics_file,
        "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n"
        "Patient3,kidney,0.400,1.000,0.100\n"
        "Patient2,kidney,0.700,1.000,0.200\n"
        "Patient1,liver,0.400,1.000,0.400\n"
        "Patient2,liver,0.800,1.000,0.300\n"
        "Patient1,liver,1.000,1.000,0.500\n")
    aggregates_file = new_file(METRICS_AGGREGATES_FILE)
    d.save_aggregates_to_csv(Path(aggregates_file))
    # Sorting should be first by structure name alphabetically, then Dice with lowest scores first.
    assert_file_contents_match_exactly(
        Path(aggregates_file),
        full_ml_test_data_path() / METRICS_AGGREGATES_FILE)
    boxplot_per_structure(d.to_data_frame(),
                          column_name=MetricsFileColumns.DiceNumeric.value,
                          title="Dice score")
    boxplot1 = new_file("boxplot_2class.png")
    resize_and_save(5, 4, boxplot1)
    plt.clf()
    d.add(p1, "lung", 0.5, 2.0, 1.0)
    d.add(p1, "foo", 0.9, 2.0, 1.0)
    d.add(p1, "bar", 0.9, 2.0, 1.0)
    d.add(p1, "baz", 0.9, 2.0, 1.0)
    boxplot_per_structure(d.to_data_frame(),
                          column_name=MetricsFileColumns.DiceNumeric.value,
                          title="Dice score")
    boxplot2 = new_file("boxplot_6class.png")
    resize_and_save(5, 4, boxplot2)
Пример #27
0
def test_nii_load_zyx(test_output_dirs: OutputFolderForTests) -> None:
    expected_shape = (44, 167, 167)
    file_path = full_ml_test_data_path("patch_sampling/scan_small.nii.gz")
    image: sitk.Image = sitk.ReadImage(str(file_path))
    assert image.GetSize() == reverse_tuple_float3(expected_shape)
    img = sitk.GetArrayFromImage(image)
    assert img.shape == expected_shape
    image_header = io_util.load_nifti_image(file_path)
    assert image_header.image.shape == expected_shape
    assert image_header.header.spacing is not None
    np.testing.assert_allclose(image_header.header.spacing, (3.0, 1.0, 1.0), rtol=0.1)
Пример #28
0
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
                       test_output_dirs: TestOutputDirectories) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config_ensemble.outputs_directory = test_output_dirs.root_dir
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory))
    assert_file_contents_match_exactly(full_file=Path(test_config_ensemble.outputs_directory)
                                                 / f"{ModelExecutionMode.VAL.value}_outliers.txt",
                                       expected_file=Path(
                                           full_ml_test_data_path(
                                               f"{ModelExecutionMode.VAL.value}_outliers.txt")))
def test_show_non_square_images(
        test_output_dirs: TestOutputDirectories) -> None:
    input_file = full_ml_test_data_path("patch_sampling") / "scan_small.nii.gz"
    input = load_nifti_image(input_file)
    image = input.image
    shape = image.shape
    mask = np.zeros_like(image)
    mask[shape[0] // 2, shape[1] // 2, shape[2] // 2] = 1
    for dim in range(3):
        scan_with_transparent_overlay(image,
                                      mask,
                                      dim,
                                      shape[dim] // 2,
                                      spacing=input.header.spacing)
        actual_file = Path(test_output_dirs.root_dir) / f"dim_{dim}.png"
        resize_and_save(5, 5, actual_file)
        expected = full_ml_test_data_path(
            "patch_sampling") / f"overlay_with_aspect_dim{dim}.png"
        # To update the stored results, uncomment this line:
        # expected.write_bytes(actual_file.read_bytes())
        assert_binary_files_match(actual_file, expected)
Пример #30
0
def test_load_and_stack_with_segmentation() -> None:
    expected_shape = (4, 5, 7)
    file_path = full_ml_test_data_path() / "hdf5_data/patient_hdf5s/4be9beed-5861-fdd2-72c2-8dd89aadc1ef.h5"
    files = [file_path, file_path]
    stacked = load_images_and_stack(files, load_segmentation=True)
    assert stacked.segmentations is not None
    assert torch.is_tensor(stacked.segmentations)
    assert stacked.segmentations.dtype == torch.uint8
    assert stacked.segmentations.shape == (2,) + expected_shape
    assert torch.is_tensor(stacked.images)
    assert stacked.images.dtype == torch.float16
    assert stacked.images.shape == (2,) + expected_shape