Пример #1
0
def init_from_model_inference_json(
    model_folder: Path,
    use_gpu: bool = True
) -> Tuple[FullImageInferencePipelineBase, SegmentationModelBase]:
    """
    Loads the config and inference pipeline from the current directory using fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
    :return: Tuple[InferencePipeline, Config]
    """
    logging.info('Python version: ' + sys.version)
    path_to_model_inference_config = model_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
    logging.info(
        f'path_to_model_inference_config: {path_to_model_inference_config}')
    model_inference_config = read_model_inference_config(
        path_to_model_inference_config)
    logging.info(f'model_inference_config: {model_inference_config}')
    full_path_to_checkpoints = [
        model_folder / x for x in model_inference_config.checkpoint_paths
    ]
    logging.info(f'full_path_to_checkpoints: {full_path_to_checkpoints}')
    loader = ModelConfigLoader(
        model_configs_namespace=model_inference_config.model_configs_namespace)
    model_config = loader.create_model_config_from_name(
        model_name=model_inference_config.model_name)
    return create_inference_pipeline(model_config, full_path_to_checkpoints,
                                     use_gpu)
Пример #2
0
def test_registered_model_file_structure_and_instantiate(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Downloads the model that was built in the most recent run, and checks if its file structure is as expected.
    """
    fallback_run_id_for_local_execution = FALLBACK_SINGLE_RUN
    model = get_most_recent_model(
        fallback_run_id_for_local_execution=fallback_run_id_for_local_execution
    )
    downloaded_folder = Path(model.download(str(test_output_dirs.root_dir)))
    print(f"Model was downloaded to folder {downloaded_folder}")
    expected_files = \
        [
            *fixed_paths.SCRIPTS_AT_ROOT,
            fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
            "InnerEye/ML/config.py",
            "InnerEye/ML/metrics.py",
            "InnerEye/ML/runner.py",
        ]
    print("Downloaded model contains these files:")
    for actual_file in downloaded_folder.rglob("*"):
        print("  " + str(actual_file.relative_to(downloaded_folder)))
    missing = []
    for expected_file in expected_files:
        full_path = downloaded_folder / expected_file
        if not full_path.is_file():
            missing.append(expected_file)
    if missing:
        print("Missing files:")
        for m in missing:
            print(m)
        pytest.fail(
            f"{len(missing)} files in the registered model are missing: {missing[:5]}"
        )

    model_inference_config = read_model_inference_config(
        downloaded_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME)
    tags = get_most_recent_run(
        fallback_run_id_for_local_execution=fallback_run_id_for_local_execution
    ).get_tags()
    model_name = tags["model_name"]
    assert model_inference_config.model_name == model_name
    assert model_inference_config.model_configs_namespace.startswith(
        "InnerEye.ML.configs.")
    assert model_inference_config.model_configs_namespace.endswith(model_name)
    loader = ModelConfigLoader(
        model_configs_namespace=model_inference_config.model_configs_namespace)
    model_config = loader.create_model_config_from_name(
        model_name=model_inference_config.model_name)
    assert type(model_config).__name__ == model_inference_config.model_name
Пример #3
0
def test_run_ml_with_classification_model(
        test_output_dirs: OutputFolderForTests,
        number_of_offline_cross_validation_splits: int,
        model_name: str) -> None:
    """
    Test training and testing of classification models, when it is started together via run_ml.
    """
    logging_to_stdout()
    azure_config = get_default_azure_config()
    azure_config.train = True
    config: ScalarModelBase = ModelConfigLoader(
    ).create_model_config_from_name(model_name)
    config.number_of_cross_validation_splits = number_of_offline_cross_validation_splits
    config.set_output_to(test_output_dirs.root_dir)
    # Trying to run DDP from the test suite hangs, hence restrict to single GPU.
    config.max_num_gpus = 1
    MLRunner(config, azure_config=azure_config).run()
    _check_offline_cross_validation_output_files(config)

    if config.perform_cross_validation:
        # Test that the result files can be correctly picked up by the cross validation routine.
        # For that, we point the downloader to the local results folder. The core download method
        # recognizes run_recovery_id == None as the signal to read from the local_run_results folder.
        config_and_files = get_config_and_results_for_offline_runs(config)
        result_files = config_and_files.files
        # One file for VAL, one for TRAIN and one for TEST for each child run
        assert len(result_files
                   ) == config.get_total_number_of_cross_validation_runs() * 3
        for file in result_files:
            assert file.dataset_csv_file is not None
            assert file.dataset_csv_file.exists()
            assert file.metrics_file is not None
            assert file.metrics_file.exists()
Пример #4
0
def create_runner_parser(
        model_config_class: type = None) -> argparse.ArgumentParser:
    """
    Creates a commandline parser, that understands all necessary arguments for running a script in Azure,
    plus all arguments for the given class. The class must be a subclass of GenericConfig.
    :param model_config_class: A class that contains the model-specific parameters.
    :return: An instance of ArgumentParser.
    """
    parser = AzureConfig.create_argparser()
    ModelConfigLoader.add_args(parser)
    if model_config_class is not None:
        if not issubclass(model_config_class, GenericConfig):
            raise ValueError(
                f"The given class must be a subclass of GenericConfig, but got: {model_config_class}"
            )
        model_config_class.add_args(parser)

    return parser
Пример #5
0
    def parse_and_load_model(self) -> ParserResult:
        """
        Parses the command line arguments, and creates configuration objects for the model itself, and for the
        Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the
        parser output from parsing the model commandline arguments.
        If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return
        value is None.
        """
        # Create a parser that will understand only the args we need for an AzureConfig
        parser1 = create_runner_parser()
        parser_result = parse_args_and_add_yaml_variables(parser1,
                                                          yaml_config_file=self.yaml_config_file,
                                                          project_root=self.project_root,
                                                          fail_on_unknown_args=False)
        azure_config = AzureConfig(**parser_result.args)
        azure_config.project_root = self.project_root
        self.azure_config = azure_config
        self.model_config = None
        if not azure_config.model:
            raise ValueError("Parameter 'model' needs to be set to tell InnerEye which model to run.")
        model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args)
        # Create the model as per the "model" commandline option. This can return either a built-in config
        # of type DeepLearningConfig, or a LightningContainer.
        config_or_container = model_config_loader.create_model_config_from_name(model_name=azure_config.model)

        def parse_overrides_and_apply(c: object, previous_parser_result: ParserResult) -> ParserResult:
            assert isinstance(c, GenericConfig)
            parser = type(c).create_argparser()
            # For each parser, feed in the unknown settings from the previous parser. All commandline args should
            # be consumed by name, hence fail if there is something that is still unknown.
            parser_result = parse_arguments(parser,
                                            settings_from_yaml=previous_parser_result.unknown_settings_from_yaml,
                                            args=previous_parser_result.unknown,
                                            fail_on_unknown_args=True)
            # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline.
            c.apply_overrides(parser_result.known_settings_from_yaml)
            c.apply_overrides(parser_result.overrides)
            c.validate()
            return parser_result

        # Now create a parser that understands overrides at model/container level.
        parser_result = parse_overrides_and_apply(config_or_container, parser_result)

        if isinstance(config_or_container, LightningContainer):
            self.lightning_container = config_or_container
        elif isinstance(config_or_container, ModelConfigBase):
            # Built-in InnerEye models use a fake container
            self.model_config = config_or_container
            self.lightning_container = InnerEyeContainer(config_or_container)
        else:
            raise ValueError(f"Don't know how to handle a loaded configuration of type {type(config_or_container)}")
        if azure_config.extra_code_directory:
            exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist"
            logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}")
        else:
            logging.info("extra_code_directory is unset")
        return parser_result
Пример #6
0
def find_models() -> List[str]:
    """
    Lists all Python files in the configs folder. Each of them is assumed to contain one model config.
    :return: list of models
    """
    path = namespace_to_path(ModelConfigLoader.get_default_search_module())
    folders = [path / "segmentation", path / "classification", path / "regression"]
    names = [str(f.stem) for folder in folders for f in folder.glob("*.py") if folder.exists()]
    return [name for name in names if
            not (name.endswith("Base") or name.endswith("Paper")) and not name.startswith("__")]
Пример #7
0
def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test starting a classification model via the commandline runner. Test if we can provide overrides
    for parameters that live inside the DeepLearningConfig, and ones that are specific to classification models.
    :return:
    """
    set_from_commandline = 12345
    scalar1 = '["label"]'
    model_name = "DummyClassification"
    initial_config = ModelConfigLoader().create_model_config_from_name(
        model_name)
    assert initial_config.non_image_feature_channels == []
    output_root = str(test_output_dirs.root_dir)
    args = [
        "",
        "--model",
        model_name,
        "--train",
        "True",
        "--random_seed",
        str(set_from_commandline),
        "--non_image_feature_channels",
        scalar1,
        "--output_to",
        output_root,
        "--max_num_gpus",
        "1",
        "--recovery_checkpoint_save_interval",
        "2",
        "--recovery_checkpoints_save_last_k",
        "2",
        "--num_epochs",
        "6",
    ]
    with mock.patch("sys.argv", args):
        config, _ = runner.run(
            project_root=fixed_paths.repository_root_directory(),
            yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
    assert isinstance(config, ScalarModelBase)
    assert config.model_name == "DummyClassification"
    assert config.get_effective_random_seed() == set_from_commandline
    assert config.non_image_feature_channels == ["label"]
    assert str(config.outputs_folder).startswith(output_root)
    assert (config.logs_folder / LOG_FILE_NAME).exists()
    # Check that we saved one checkpoint every second epoch and that we kept only that last 2 and that last.ckpt has
    # been renamed to best.ckpt
    assert len(os.listdir(config.checkpoint_folder)) == 3
    assert (config.checkpoint_folder /
            str(RECOVERY_CHECKPOINT_FILE_NAME + "_epoch=3" +
                CHECKPOINT_SUFFIX)).exists()
    assert (config.checkpoint_folder /
            str(RECOVERY_CHECKPOINT_FILE_NAME + "_epoch=5" +
                CHECKPOINT_SUFFIX)).exists()
    assert (config.checkpoint_folder /
            BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).exists()
Пример #8
0
def get_configs(
        default_model_config: SegmentationModelBase, yaml_file_path: Path
) -> Tuple[SegmentationModelBase, AzureConfig, Dict]:
    parser_result = create_parser(yaml_file_path)
    args = parser_result.args
    runner_config = AzureConfig(**args)
    logging_to_stdout(args["log_level"])
    config = default_model_config or ModelConfigLoader(
    ).create_model_config_from_name(runner_config.model)
    config.apply_overrides(parser_result.overrides, should_validate=False)
    return config, runner_config, args
Пример #9
0
def test_load_all_configs(model_name: str) -> None:
    """
    Loads all model configurations that are present in the ML/src/configs folder,
    and carries out basic validations of the configuration.
    """
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    config = ModelConfigLoader().create_model_config_from_name(model_name)
    assert config.model_name == model_name, "Mismatch between definition .py file and model name"
    if config.is_segmentation_model:
        # Reduce the feature channels to a minimum, to make tests run fast on CPU.
        minimal_feature_channels = 1
        config.feature_channels = [minimal_feature_channels] * len(config.feature_channels)
        print("Model architecture after restricting to 2 feature channels only:")
        model = create_model_with_temperature_scaling(config)
        generate_and_print_model_summary(config, model)  # type: ignore
    else:
        # For classification models, we can't always print a model summary: The model could require arbitrary
        # numbers of input tensors, and we'd only know once we load the training data.
        # Hence, only try to create the model, but don't attempt to print the summary.
        create_model_with_temperature_scaling(config)
Пример #10
0
def test_config_loader_as_in_registration() -> None:
    """
    During model registration, the model config namespace is read out from the present model. Ensure that we
    can create a config loader that has that value as an input.
    """
    loader1 = ModelConfigLoader()
    model_name = "BasicModel2Epochs"
    model = loader1.create_model_config_from_name(model_name)
    assert model is not None
    namespace = model.__module__
    loader2 = ModelConfigLoader(model_configs_namespace=namespace)
    assert len(loader2.module_search_specs) == 2
    model2 = loader2.create_model_config_from_name(model_name)
    assert model2 is not None
def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test starting a classification model via the commandline runner. Test if we can provide overrides
    for parameters that live inside the DeepLearningConfig, and ones that are specific to classification models.
    :return:
    """
    set_from_commandline = 12345
    scalar1 = '["label"]'
    model_name = "DummyClassification"
    initial_config = ModelConfigLoader().create_model_config_from_name(
        model_name)
    assert initial_config.non_image_feature_channels == []
    output_root = str(test_output_dirs.root_dir)
    args = [
        "",
        "--model",
        model_name,
        "--train",
        "True",
        "--random_seed",
        str(set_from_commandline),
        "--non_image_feature_channels",
        scalar1,
        "--output_to",
        output_root,
        "--max_num_gpus",
        "1",
        "--num_epochs",
        "6",
    ]
    with mock.patch("sys.argv", args):
        config, _ = runner.run(
            project_root=fixed_paths.repository_root_directory(),
            yaml_config_file=fixed_paths.SETTINGS_YAML_FILE)
    assert isinstance(config, ScalarModelBase)
    assert config.model_name == "DummyClassification"
    assert config.get_effective_random_seed() == set_from_commandline
    assert config.non_image_feature_channels == ["label"]
    assert str(config.outputs_folder).startswith(output_root)
def main() -> None:
    parser = create_runner_parser(SegmentationModelBase)
    parser_result = parse_args_and_add_yaml_variables(
        parser, fail_on_unknown_args=True)
    surface_distance_config = SurfaceDistanceConfig.parse_args()

    azure_config = AzureConfig(**parser_result.args)
    config_model = azure_config.model
    if config_model is None:
        raise ValueError(
            "The name of the model to train must be given in the --model argument."
        )

    model_config = ModelConfigLoader().create_model_config_from_name(
        config_model)
    model_config.apply_overrides(parser_result.overrides, should_validate=True)
    execution_mode = surface_distance_config.execution_mode

    run_mode = surface_distance_config.run_mode
    if run_mode == SurfaceDistanceRunType.IOV:
        ct_path = Path(
            "outputs") / SurfaceDistanceRunType.IOV.value.lower() / "ct.nii.gz"
        ct = load_nifti_image(ct_path).image
    else:
        ct = None
    annotators = [
        annotator.strip() for annotator in surface_distance_config.annotators
    ]
    extended_annotators = annotators + [surface_distance_config.model_name]

    outlier_range = surface_distance_config.outlier_range
    predictions = load_predictions(run_mode, azure_config, model_config,
                                   execution_mode, extended_annotators,
                                   outlier_range)
    segmentations = [
        load_nifti_image(Path(pred_seg.segmentation_path))
        for pred_seg in predictions
    ]
    img_shape = segmentations[0].image.shape
    # transpose spacing to match image which is transposed in io_util
    voxel_spacing = segmentations[0].header.spacing[::-1]

    overall_gold_standard = np.zeros(img_shape)
    sds_for_annotator = sd_util.initialise_surface_distance_dictionary(
        extended_annotators, img_shape)

    plane = surface_distance_config.plane
    output_img_dir = Path(surface_distance_config.output_img_dir)

    subject_id: Optional[int] = None
    for prediction, pred_seg_w_header in zip(predictions, segmentations):
        subject_id = prediction.subject_id
        structure_name = prediction.structure_name
        annotator = prediction.annotator
        pred_segmentation = pred_seg_w_header.image
        if run_mode == SurfaceDistanceRunType.OUTLIERS:
            try:
                ground_truth = sd_util.load_ground_truth_from_run(
                    model_config, surface_distance_config, subject_id,
                    structure_name)
            except FileNotFoundError as e:
                logging.warning(e)
                continue
        elif run_mode == SurfaceDistanceRunType.IOV:
            ground_truth = sd_util.get_annotations_and_majority_vote(
                model_config, annotators, structure_name)
        else:
            raise ValueError(
                f'Unrecognised run mode: {run_mode}. Expected either IOV or OUTLIERS'
            )

        binary_prediction_mask = multi_label_array_to_binary(
            pred_segmentation, 2)[1]
        # For comparison, plot gold standard vs predicted segmentation
        segmentation_and_groundtruth_plot(binary_prediction_mask,
                                          ground_truth,
                                          subject_id,
                                          structure_name,
                                          plane,
                                          output_img_dir,
                                          annotator=annotator)

        if run_mode == SurfaceDistanceRunType.IOV:
            overall_gold_standard += ground_truth

        # Calculate and plot surface distance
        sds_full = sd_util.calculate_surface_distances(ground_truth,
                                                       binary_prediction_mask,
                                                       list(voxel_spacing))
        surface_distance_ground_truth_plot(ct,
                                           ground_truth,
                                           sds_full,
                                           subject_id,
                                           structure_name,
                                           plane,
                                           output_img_dir,
                                           annotator=annotator)

        if annotator is not None:
            sds_for_annotator[annotator] += sds_full

    # Plot all structures SDs for each annotator
    if run_mode == SurfaceDistanceRunType.IOV and subject_id is not None:
        for annotator, sds in sds_for_annotator.items():
            num_classes = int(np.amax(np.unique(overall_gold_standard)))
            binarised_gold_standard = multi_label_array_to_binary(
                overall_gold_standard, num_classes)[1:].sum(axis=0)
            surface_distance_ground_truth_plot(ct,
                                               binarised_gold_standard,
                                               sds,
                                               subject_id,
                                               'All',
                                               plane,
                                               output_img_dir,
                                               annotator=annotator)
Пример #13
0
def get_model_loader(namespace: Optional[str] = None) -> ModelConfigLoader:
    """
    Returns a ModelConfigLoader for segmentation models, with the given non-default namespace (if not None)
    to search under.
    """
    return ModelConfigLoader(model_configs_namespace=namespace)