Exemplo n.º 1
0
    def generate_report(config: DeepLearningConfig, best_epoch: int, model_proc: ModelProcessing) -> None:
        logging.info("Saving report in html")
        if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]:
            return

        try:
            def get_epoch_path(mode: ModelExecutionMode) -> Path:
                p = get_epoch_results_path(best_epoch, mode=mode, model_proc=model_proc)
                return config.outputs_folder / p / METRICS_FILE_NAME

            path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
            path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
            path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)

            output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \
                if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder
            if config.model_category == ModelCategory.Segmentation:
                generate_segmentation_notebook(result_notebook=output_dir / REPORT_IPYNB,
                                               train_metrics=path_to_best_epoch_train,
                                               val_metrics=path_to_best_epoch_val,
                                               test_metrics=path_to_best_epoch_test)
            else:
                if isinstance(config, ScalarModelBase):
                    generate_classification_notebook(result_notebook=output_dir / REPORT_IPYNB,
                                                     train_metrics=path_to_best_epoch_train,
                                                     val_metrics=path_to_best_epoch_val,
                                                     test_metrics=path_to_best_epoch_test,
                                                     dataset_csv_path=config.local_dataset / DATASET_CSV_FILE_NAME
                                                                        if config.local_dataset else None,
                                                     dataset_subject_column=config.subject_column,
                                                     dataset_file_column=config.image_file_column)
                else:
                    logging.info(f"Cannot create report for config of type {type(config)}.")
        except Exception as ex:
            print_exception(ex, "Failed to generated reporting notebook.")
Exemplo n.º 2
0
    def generate_report(self, model_proc: ModelProcessing) -> None:
        config = self.model_config
        if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]:
            logging.info(f"No reporting available for a model with category {config.model_category}")
            return
        logging.info("Saving report in HTML")
        try:
            def get_epoch_path(mode: ModelExecutionMode) -> Path:
                p = get_epoch_results_path(mode=mode, model_proc=model_proc)
                return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME

            path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
            path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
            path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)

            output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \
                if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder

            reports_dir = output_dir / reports_folder
            if not reports_dir.exists():
                reports_dir.mkdir(exist_ok=False)

            if config.model_category == ModelCategory.Segmentation:
                generate_segmentation_notebook(
                    result_notebook=reports_dir / get_ipynb_report_name(config.model_category.value),
                    train_metrics=path_to_best_epoch_train,
                    val_metrics=path_to_best_epoch_val,
                    test_metrics=path_to_best_epoch_test)
            else:
                if isinstance(config, ScalarModelBase) and not isinstance(config, SequenceModelBase):
                    generate_classification_notebook(
                        result_notebook=reports_dir / get_ipynb_report_name(config.model_category.value),
                        config=config,
                        train_metrics=path_to_best_epoch_train,
                        val_metrics=path_to_best_epoch_val,
                        test_metrics=path_to_best_epoch_test)

                    if len(config.class_names) > 1:
                        generate_classification_multilabel_notebook(
                            result_notebook=reports_dir / get_ipynb_report_name(f"{config.model_category.value}_multilabel"),
                            config=config,
                            train_metrics=path_to_best_epoch_train,
                            val_metrics=path_to_best_epoch_val,
                            test_metrics=path_to_best_epoch_test)
                else:
                    logging.info(f"Cannot create report for config of type {type(config)}.")
        except Exception as ex:
            print_exception(ex, "Failed to generated reporting notebook.")
            raise
def test_generate_classification_report(
        test_output_dirs: OutputFolderForTests) -> None:
    reports_folder = Path(__file__).parent
    test_metrics_file = reports_folder / "test_metrics_classification.csv"
    val_metrics_file = reports_folder / "val_metrics_classification.csv"

    config = ScalarModelBase(label_value_column="label",
                             image_file_column="filePath",
                             subject_column="subject")
    config.local_dataset = test_output_dirs.root_dir / "dataset"
    config.local_dataset.mkdir()
    dataset_csv = config.local_dataset / "dataset.csv"
    image_file_name = "image.npy"
    dataset_csv.write_text("subject,filePath,label\n"
                           f"0,0_{image_file_name},0\n"
                           f"1,1_{image_file_name},0\n"
                           f"2,0_{image_file_name},0\n"
                           f"3,1_{image_file_name},0\n"
                           f"4,0_{image_file_name},0\n"
                           f"5,1_{image_file_name},0\n"
                           f"6,0_{image_file_name},0\n"
                           f"7,1_{image_file_name},0\n"
                           f"8,0_{image_file_name},0\n"
                           f"9,1_{image_file_name},0\n"
                           f"10,0_{image_file_name},0\n"
                           f"11,1_{image_file_name},0\n")

    np.save(str(Path(config.local_dataset / f"0_{image_file_name}")),
            np.random.randint(0, 255, [5, 4]))
    np.save(str(Path(config.local_dataset / f"1_{image_file_name}")),
            np.random.randint(0, 255, [5, 4]))

    result_file = test_output_dirs.root_dir / "report.ipynb"
    result_html = generate_classification_notebook(
        result_notebook=result_file,
        config=config,
        val_metrics=val_metrics_file,
        test_metrics=test_metrics_file)
    assert result_file.is_file()
    assert result_html.is_file()
    assert result_html.suffix == ".html"
Exemplo n.º 4
0
def test_generate_classification_report(
        test_output_dirs: OutputFolderForTests) -> None:
    reports_folder = Path(__file__).parent
    test_metrics_file = reports_folder / "test_metrics_classification.csv"
    val_metrics_file = reports_folder / "val_metrics_classification.csv"
    dataset_csv_path = reports_folder / 'dataset.csv'
    dataset_subject_column = "subject"
    dataset_file_column = "filePath"

    current_dir = test_output_dirs.make_sub_dir("test_classification_report")
    result_file = current_dir / "report.ipynb"
    result_html = generate_classification_notebook(
        result_notebook=result_file,
        val_metrics=val_metrics_file,
        test_metrics=test_metrics_file,
        dataset_csv_path=dataset_csv_path,
        dataset_subject_column=dataset_subject_column,
        dataset_file_column=dataset_file_column)
    assert result_file.is_file()
    assert result_html.is_file()
    assert result_html.suffix == ".html"
def test_train_classification_multilabel_model(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of classification models, asserting on the individual results from training and
    testing.
    Expected test results are stored for GPU with and without mixed precision.
    """
    logging_to_stdout(logging.DEBUG)
    config = DummyMulticlassClassification()
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=Path(test_output_dirs.root_dir))
    # Train for 4 epochs, checkpoints at epochs 2 and 4
    config.num_epochs = 4
    model_training_result = model_training.model_train(
        config, checkpoint_handler=checkpoint_handler)
    assert model_training_result is not None
    expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05]
    expected_train_loss = [
        0.699870228767395, 0.6239662170410156, 0.551329493522644,
        0.4825132489204407
    ]
    expected_val_loss = [
        0.6299371719360352, 0.5546272993087769, 0.4843321740627289,
        0.41909298300743103
    ]
    # Ensure that all metrics are computed on both training and validation set
    assert len(
        model_training_result.train_results_per_epoch) == config.num_epochs
    assert len(
        model_training_result.val_results_per_epoch) == config.num_epochs
    assert len(model_training_result.train_results_per_epoch[0]) >= 11
    assert len(model_training_result.val_results_per_epoch[0]) >= 11
    for class_name in config.class_names:
        for metric in [
                MetricType.ACCURACY_AT_THRESHOLD_05,
                MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD,
                MetricType.AREA_UNDER_PR_CURVE,
                MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY
        ]:
            assert f'{metric.value}/{class_name}' in model_training_result.train_results_per_epoch[
                0], f"{metric.value} not in training"
            assert f'{metric.value}/{class_name}' in model_training_result.val_results_per_epoch[
                0], f"{metric.value} not in validation"
    for metric in [
            MetricType.LOSS, MetricType.SECONDS_PER_EPOCH,
            MetricType.SUBJECT_COUNT
    ]:
        assert metric.value in model_training_result.train_results_per_epoch[
            0], f"{metric.value} not in training"
        assert metric.value in model_training_result.val_results_per_epoch[
            0], f"{metric.value} not in validation"

    actual_train_loss = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LOSS.value)
    actual_val_loss = model_training_result.get_metric(
        is_training=False, metric_type=MetricType.LOSS.value)
    actual_lr = model_training_result.get_metric(
        is_training=True, metric_type=MetricType.LEARNING_RATE.value)
    assert actual_train_loss == pytest.approx(expected_train_loss,
                                              abs=1e-6), "Training loss"
    assert actual_val_loss == pytest.approx(expected_val_loss,
                                            abs=1e-6), "Validation loss"
    assert actual_lr == pytest.approx(expected_learning_rates,
                                      rel=1e-5), "Learning rates"
    test_results = model_testing.model_test(
        config,
        ModelExecutionMode.TRAIN,
        checkpoint_handler=checkpoint_handler)
    assert isinstance(test_results, InferenceMetricsForClassification)

    expected_metrics = {
        MetricType.CROSS_ENTROPY: [1.3996, 5.2966, 1.4020, 0.3553, 0.6908],
        MetricType.ACCURACY_AT_THRESHOLD_05:
        [0.0000, 0.0000, 0.0000, 1.0000, 1.0000]
    }

    for i, class_name in enumerate(config.class_names):
        for metric in expected_metrics.keys():
            assert expected_metrics[metric][i] == pytest.approx(
                test_results.metrics.get_single_metric(metric_name=metric,
                                                       hue=class_name), 1e-4)

    def get_epoch_path(mode: ModelExecutionMode) -> Path:
        p = get_epoch_results_path(mode=mode)
        return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME

    path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN)
    path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL)
    path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST)
    generate_classification_notebook(
        result_notebook=config.outputs_folder /
        get_ipynb_report_name(config.model_category.value),
        config=config,
        train_metrics=path_to_best_epoch_train,
        val_metrics=path_to_best_epoch_val,
        test_metrics=path_to_best_epoch_test)
    assert (config.outputs_folder /
            get_html_report_name(config.model_category.value)).exists()

    report_name_multilabel = f"{config.model_category.value}_multilabel"
    generate_classification_multilabel_notebook(
        result_notebook=config.outputs_folder /
        get_ipynb_report_name(report_name_multilabel),
        config=config,
        train_metrics=path_to_best_epoch_train,
        val_metrics=path_to_best_epoch_val,
        test_metrics=path_to_best_epoch_test)
    assert (config.outputs_folder /
            get_html_report_name(report_name_multilabel)).exists()