def __init__(self, config: DeepLearningConfig, *args: Any,
              **kwargs: Any) -> None:
     super().__init__(*args, **kwargs)
     self.outputs_folder = config.outputs_folder
     self.checkpoint_folder = config.checkpoint_folder
     self.model: DeviceAwareModule = DeviceAwareModule()
     # These two will be set later in set_optimizer_and_scheduler.
     # The ddp_spawn accelerator only works if the model configuration object is
     # not stored in here. Hence, need to do operations that require a full config
     # in a way that does not require storing the config.
     self.optimizer: Optional[Optimizer] = None
     self.l_rate_scheduler: Optional[_LRScheduler] = None
     self.cross_validation_split_index = config.cross_validation_split_index
     self.effective_random_seed = config.get_effective_random_seed()
     # This should be re-assigned on the outside, to a logger that is hooked up with the Trainer object.
     self.storing_logger = StoringLogger()
     # This will be initialized correctly in epoch_start
     self.random_state: Optional[RandomStateSnapshot] = None
     # training loggers
     self.train_metrics_folder = self.outputs_folder / ModelExecutionMode.TRAIN.value
     self.val_metrics_folder = self.outputs_folder / ModelExecutionMode.VAL.value
     fixed_logger_columns = {
         LoggingColumns.CrossValidationSplitIndex.value:
         config.cross_validation_split_index
     }
     self.train_epoch_metrics_logger = DataframeLogger(
         self.train_metrics_folder / EPOCH_METRICS_FILE_NAME,
         fixed_columns=fixed_logger_columns)
     self.val_epoch_metrics_logger = DataframeLogger(
         self.val_metrics_folder / EPOCH_METRICS_FILE_NAME,
         fixed_columns=fixed_logger_columns)
     # Stores information the checkpoint that created this model, if any.
     self.checkpoint_loading_message = ""
 def on_train_start(self) -> None:
     """
     Initializes the per-rank logger objects that write to the file system.
     """
     # These loggers store the per-subject model outputs. They cannot be initialized in the constructor because
     # the trainer object will not yet be set, and we need to get the rank from there.
     fixed_logger_columns = {LoggingColumns.CrossValidationSplitIndex.value: self.cross_validation_split_index}
     subject_output_file = get_subject_output_file_per_rank(self.trainer.global_rank)
     self.train_subject_outputs_logger = DataframeLogger(self.train_metrics_folder / subject_output_file,
                                                         fixed_columns=fixed_logger_columns)
     self.val_subject_outputs_logger = DataframeLogger(self.val_metrics_folder / subject_output_file,
                                                       fixed_columns=fixed_logger_columns)
def test_load_metrics_from_df_with_hues(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if we can re-create a MetricsDict object with model predictions and labels, when the data file contains
    a prediction target value.
    """
    df_str = """prediction_target,epoch,subject,model_output,label,cross_validation_split_index,data_split
01,1,2137.00005,0.54349,1.0,0,Val
01,1,2137.00125,0.54324,0.0,1,Val
01,1,3250.00005,0.50822,0.0,0,Val
01,1,3250.12345,0.47584,0.0,1,Val
02,1,2137.00005,0.55538,1.0,0,Val
02,1,2137.00125,0.55759,0.0,1,Val
02,1,3250.00005,0.47255,0.0,0,Val
02,1,3250.12345,0.46996,0.0,1,Val
03,1,2137.00005,0.56670,1.0,0,Val
03,1,2137.00125,0.57003,0.0,1,Val
03,1,3250.00005,0.46321,0.0,0,Val
03,1,3250.12345,0.47309,0.0,1,Val
"""
    df = pd.read_csv(StringIO(df_str),
                     converters={LoggingColumns.Hue.value: lambda x: x})
    metrics = ScalarMetricsDict.load_execution_mode_metrics_from_df(
        df, is_classification_metrics=True)
    mode = ModelExecutionMode.VAL
    epoch = 1
    assert mode in metrics
    assert epoch in metrics[mode]
    metrics_dict = metrics[mode][epoch]
    expected_hues = ["01", "02", "03"]
    assert metrics_dict.get_hue_names(include_default=False) == expected_hues
    for hue in expected_hues:
        assert len(metrics_dict._get_hue(hue).get_predictions()) == 4
    logger_output_file = test_output_dirs.create_file_or_folder_path(
        "output.csv")
    logger = DataframeLogger(csv_path=logger_output_file)
    ScalarMetricsDict.aggregate_and_save_execution_mode_metrics(
        metrics, logger)
    output = pd.read_csv(logger_output_file, dtype=str)
    assert LoggingColumns.Hue.value in output
    assert list(output[LoggingColumns.Hue.value]) == expected_hues
    assert LoggingColumns.DataSplit.value in output
    assert list(output[LoggingColumns.DataSplit.value].unique()) == [
        ModelExecutionMode.VAL.value
    ]
    assert LoggingColumns.Epoch.value in output
    assert list(output[LoggingColumns.Epoch.value].unique()) == ["1"]
    assert LoggingColumns.AreaUnderPRCurve.value in output
    assert list(output[LoggingColumns.AreaUnderPRCurve.value]) == [
        '1.00000', '0.25000', '0.25000'
    ]
def test_dataframe_logger() -> None:
    fixed_columns = {"cross_validation_split_index": 1}
    records = [
        {
            "bar": math.pi,
            MetricType.LEARNING_RATE.value: 1e-5
        },
        {
            "bar": math.pi,
            MetricType.LEARNING_RATE.value: 1
        },
    ]
    out_buffer = StringIO()
    df = DataframeLogger(csv_path=out_buffer, fixed_columns=fixed_columns)
    for r in records:
        df.add_record(r)
    df.flush()
    assert out_buffer.getvalue().splitlines() == [
        'bar,LearningRate,cross_validation_split_index',
        '3.141593,1.000000e-05,1', '3.141593,1.000000e+00,1'
    ]
Пример #5
0
def classification_model_test(
        config: ScalarModelBase, data_split: ModelExecutionMode,
        checkpoint_paths: List[Path], model_proc: ModelProcessing,
        cross_val_split_index: int) -> InferenceMetricsForClassification:
    """
    The main testing loop for classification models. It runs a loop over all epochs for which testing should be done.
    It loads the model and datasets, then proceeds to test the model for all requested checkpoints.
    :param config: The model configuration.
    :param data_split: The name of the folder to store the results inside each epoch folder in the outputs_dir,
                       used mainly in model evaluation using different dataset splits.
    :param checkpoint_paths: Checkpoint paths to initialize model
    :param model_proc: whether we are testing an ensemble or single model
    :return: InferenceMetricsForClassification object that contains metrics related for all of the checkpoint epochs.
    """
    pipeline = create_inference_pipeline(config=config,
                                         checkpoint_paths=checkpoint_paths)
    if pipeline is None:
        raise ValueError("Inference pipeline could not be created.")
    # for mypy
    assert isinstance(pipeline, ScalarInferencePipelineBase)
    ml_util.set_random_seed(config.get_effective_random_seed(),
                            "Model Testing")
    ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
        shuffle=False, batch_size=1, num_dataload_workers=0)
    logging.info(f"Starting to evaluate model on {data_split.value} set.")
    results_folder = config.outputs_folder / get_best_epoch_results_path(
        data_split, model_proc)
    os.makedirs(str(results_folder), exist_ok=True)
    metrics_dict = create_metrics_dict_for_scalar_models(config)
    output_logger: Optional[DataframeLogger] = DataframeLogger(
        csv_path=results_folder / MODEL_OUTPUT_CSV)

    for sample in ds:
        result = pipeline.predict(sample)
        model_output = result.posteriors
        label = result.labels.to(device=model_output.device)
        sample_id = result.subject_ids[0]
        if output_logger:
            for i in range(len(config.target_names)):
                output_logger.add_record({
                    LoggingColumns.Patient.value:
                    sample_id,
                    LoggingColumns.Hue.value:
                    config.target_names[i],
                    LoggingColumns.Label.value:
                    label[0][i].item(),
                    LoggingColumns.ModelOutput.value:
                    model_output[0][i].item(),
                    LoggingColumns.CrossValidationSplitIndex.value:
                    cross_val_split_index
                })

        compute_scalar_metrics(metrics_dict,
                               subject_ids=[sample_id],
                               model_output=model_output,
                               labels=label,
                               loss_type=config.loss_type)
        logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")
    average = metrics_dict.average(across_hues=False)
    logging.info(average.to_string())
    if isinstance(metrics_dict, ScalarMetricsDict):
        csv_file = results_folder / SUBJECT_METRICS_FILE_NAME
        logging.info(
            f"Writing {data_split.value} metrics to file {str(csv_file)}")
        # If we are running inference after a training run, the validation set metrics may have been written
        # during train time. If this is not the case, or we are running on the test set, create the metrics
        # file.
        if not csv_file.exists():
            df_logger = DataframeLogger(csv_file)
            # For test if ensemble split should be default, else record which fold produced this prediction
            cv_index = DEFAULT_CROSS_VALIDATION_SPLIT_INDEX if model_proc == ModelProcessing.ENSEMBLE_CREATION \
                else cross_val_split_index
            metrics_dict.store_metrics_per_subject(
                df_logger=df_logger,
                mode=data_split,
                cross_validation_split_index=cv_index,
                epoch=BEST_EPOCH_FOLDER_NAME)
            # write to disk
            df_logger.flush()

    if output_logger:
        output_logger.flush()

    return InferenceMetricsForClassification(metrics=metrics_dict)
def plot_cross_validation_from_files(
        config_and_files: OfflineCrossvalConfigAndFiles,
        root_folder: Path,
        is_ensemble_run: bool = False) -> None:
    """
    Runs various plots for the results of a cross validation run, and writes them to a given folder.
    :param config_and_files: The setup for plotting results and the set of data files to analyse.
    :param root_folder: The folder into which the results should be written.
    :param is_ensemble_run: If True, assume that this run of cross validation analysis is for an ensemble model
    and assert that there are N+1 data files available. If false, this analysis only concerns the cross
    validation runs, and check that the number of files is N.
    """
    config = config_and_files.config
    if config.number_of_cross_validation_splits > 1:
        check_result_file_counts(config_and_files,
                                 is_ensemble_run=is_ensemble_run)
    result_files = config_and_files.files
    metrics_dfs = load_dataframes(result_files, config)
    full_csv_file = root_folder / FULL_METRICS_DATAFRAME_FILE
    initial_metrics = pd.concat(list(metrics_dfs.values()))
    if config.model_category == ModelCategory.Segmentation:
        if config.create_plots:
            plot_metrics(config, metrics_dfs, root_folder)
        save_outliers(config, metrics_dfs, root_folder)
        all_metrics, focus_splits = add_comparison_data(
            config, initial_metrics)
        all_metrics.to_csv(full_csv_file, index=False)
        run_statistical_tests_on_file(root_folder, full_csv_file, config,
                                      focus_splits)
    else:
        # For classification runs, we also want to compute the aggregated training metrics for
        # each fold.
        metrics = ScalarMetricsDict.load_execution_mode_metrics_from_df(
            initial_metrics,
            config.model_category == ModelCategory.Classification)
        ScalarMetricsDict.aggregate_and_save_execution_mode_metrics(
            metrics=metrics,
            data_frame_logger=DataframeLogger(csv_path=root_folder /
                                              METRICS_AGGREGATES_FILE))
        # The full metrics file saves the prediction for each individual subject. Do not include the training
        # results in this file (as in cross-validation a subject is used in several folds.)
        val_and_test_metrics = initial_metrics.loc[initial_metrics[
            LoggingColumns.DataSplit.value] != ModelExecutionMode.TRAIN.value]
        val_and_test_metrics.to_csv(full_csv_file, index=False)

        # Copy one instance of the dataset.CSV files to the root of the results folder. It is possible
        # that the different CV folds run with different dataset files, but not expected for classification
        # models at the moment (could change with ensemble models)
        dataset_csv = None
        for file in result_files:
            if file.dataset_csv_file:
                dataset_csv = file.dataset_csv_file
                break
        if dataset_csv:
            shutil.copy(str(dataset_csv), str(root_folder))
    name_dct = config_and_files.config.short_names
    if name_dct:
        pairs = [(val, key) for key, val in name_dct.items()]
        with Path(root_folder / RUN_DICTIONARY_NAME).open("w") as out:
            max_len = max(len(short_name) for short_name, _ in pairs)
            for short_name, long_name in sorted(pairs):
                out.write(f"{short_name:{max_len}s}    {long_name}\n")
def classification_model_test(config: ScalarModelBase,
                              data_split: ModelExecutionMode,
                              checkpoint_handler: CheckpointHandler,
                              model_proc: ModelProcessing) -> InferenceMetricsForClassification:
    """
    The main testing loop for classification models. It runs a loop over all epochs for which testing should be done.
    It loads the model and datasets, then proceeds to test the model for all requested checkpoints.
    :param config: The model configuration.
    :param data_split: The name of the folder to store the results inside each epoch folder in the outputs_dir,
                       used mainly in model evaluation using different dataset splits.
    :param checkpoint_handler: Checkpoint handler object to find checkpoint paths for model initialization
    :param model_proc: whether we are testing an ensemble or single model
    :return: InferenceMetricsForClassification object that contains metrics related for all of the checkpoint epochs.
    """

    def test_epoch(checkpoint_paths: List[Path]) -> Optional[MetricsDict]:
        pipeline = create_inference_pipeline(config=config,
                                             checkpoint_paths=checkpoint_paths)

        if pipeline is None:
            return None

        # for mypy
        assert isinstance(pipeline, ScalarInferencePipelineBase)

        ml_util.set_random_seed(config.get_effective_random_seed(), "Model Testing")
        ds = config.get_torch_dataset_for_inference(data_split).as_data_loader(
            shuffle=False,
            batch_size=1,
            num_dataload_workers=0
        )

        logging.info(f"Starting to evaluate model on {data_split.value} set.")
        metrics_dict = create_metrics_dict_for_scalar_models(config)
        for sample in ds:
            result = pipeline.predict(sample)
            model_output = result.posteriors
            label = result.labels.to(device=model_output.device)
            sample_id = result.subject_ids[0]
            compute_scalar_metrics(metrics_dict,
                                   subject_ids=[sample_id],
                                   model_output=model_output,
                                   labels=label,
                                   loss_type=config.loss_type)
            logging.debug(f"Example {sample_id}: {metrics_dict.to_string()}")

        average = metrics_dict.average(across_hues=False)
        logging.info(average.to_string())

        return metrics_dict

    checkpoints_to_test = checkpoint_handler.get_checkpoints_to_test()

    if not checkpoints_to_test:
        raise ValueError("There were no checkpoints available for model testing.")

    result = test_epoch(checkpoint_paths=checkpoints_to_test)
    if result is None:
        raise ValueError("There was no single checkpoint file available for model testing.")
    else:
        if isinstance(result, ScalarMetricsDict):
            results_folder = config.outputs_folder / get_epoch_results_path(data_split, model_proc)
            csv_file = results_folder / SUBJECT_METRICS_FILE_NAME

            logging.info(f"Writing {data_split.value} metrics to file {str(csv_file)}")

            # If we are running inference after a training run, the validation set metrics may have been written
            # during train time. If this is not the case, or we are running on the test set, create the metrics
            # file.
            if not csv_file.exists():
                os.makedirs(str(results_folder), exist_ok=False)
                df_logger = DataframeLogger(csv_file)

                # cross validation split index not relevant during test time
                result.store_metrics_per_subject(df_logger=df_logger,
                                                 mode=data_split)
                # write to disk
                df_logger.flush()

    return InferenceMetricsForClassification(metrics=result)