def generate_report(config: DeepLearningConfig, best_epoch: int, model_proc: ModelProcessing) -> None: logging.info("Saving report in html") if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]: return try: def get_epoch_path(mode: ModelExecutionMode) -> Path: p = get_epoch_results_path(best_epoch, mode=mode, model_proc=model_proc) return config.outputs_folder / p / METRICS_FILE_NAME path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN) path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL) path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST) output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \ if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder if config.model_category == ModelCategory.Segmentation: generate_segmentation_notebook(result_notebook=output_dir / REPORT_IPYNB, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) else: if isinstance(config, ScalarModelBase): generate_classification_notebook(result_notebook=output_dir / REPORT_IPYNB, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test, dataset_csv_path=config.local_dataset / DATASET_CSV_FILE_NAME if config.local_dataset else None, dataset_subject_column=config.subject_column, dataset_file_column=config.image_file_column) else: logging.info(f"Cannot create report for config of type {type(config)}.") except Exception as ex: print_exception(ex, "Failed to generated reporting notebook.")
def generate_report(self, model_proc: ModelProcessing) -> None: config = self.model_config if config.model_category not in [ModelCategory.Segmentation, ModelCategory.Classification]: logging.info(f"No reporting available for a model with category {config.model_category}") return logging.info("Saving report in HTML") try: def get_epoch_path(mode: ModelExecutionMode) -> Path: p = get_epoch_results_path(mode=mode, model_proc=model_proc) return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN) path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL) path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST) output_dir = config.outputs_folder / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME \ if model_proc == ModelProcessing.ENSEMBLE_CREATION else config.outputs_folder reports_dir = output_dir / reports_folder if not reports_dir.exists(): reports_dir.mkdir(exist_ok=False) if config.model_category == ModelCategory.Segmentation: generate_segmentation_notebook( result_notebook=reports_dir / get_ipynb_report_name(config.model_category.value), train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) else: if isinstance(config, ScalarModelBase) and not isinstance(config, SequenceModelBase): generate_classification_notebook( result_notebook=reports_dir / get_ipynb_report_name(config.model_category.value), config=config, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) if len(config.class_names) > 1: generate_classification_multilabel_notebook( result_notebook=reports_dir / get_ipynb_report_name(f"{config.model_category.value}_multilabel"), config=config, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) else: logging.info(f"Cannot create report for config of type {type(config)}.") except Exception as ex: print_exception(ex, "Failed to generated reporting notebook.") raise
def test_generate_classification_report( test_output_dirs: OutputFolderForTests) -> None: reports_folder = Path(__file__).parent test_metrics_file = reports_folder / "test_metrics_classification.csv" val_metrics_file = reports_folder / "val_metrics_classification.csv" config = ScalarModelBase(label_value_column="label", image_file_column="filePath", subject_column="subject") config.local_dataset = test_output_dirs.root_dir / "dataset" config.local_dataset.mkdir() dataset_csv = config.local_dataset / "dataset.csv" image_file_name = "image.npy" dataset_csv.write_text("subject,filePath,label\n" f"0,0_{image_file_name},0\n" f"1,1_{image_file_name},0\n" f"2,0_{image_file_name},0\n" f"3,1_{image_file_name},0\n" f"4,0_{image_file_name},0\n" f"5,1_{image_file_name},0\n" f"6,0_{image_file_name},0\n" f"7,1_{image_file_name},0\n" f"8,0_{image_file_name},0\n" f"9,1_{image_file_name},0\n" f"10,0_{image_file_name},0\n" f"11,1_{image_file_name},0\n") np.save(str(Path(config.local_dataset / f"0_{image_file_name}")), np.random.randint(0, 255, [5, 4])) np.save(str(Path(config.local_dataset / f"1_{image_file_name}")), np.random.randint(0, 255, [5, 4])) result_file = test_output_dirs.root_dir / "report.ipynb" result_html = generate_classification_notebook( result_notebook=result_file, config=config, val_metrics=val_metrics_file, test_metrics=test_metrics_file) assert result_file.is_file() assert result_html.is_file() assert result_html.suffix == ".html"
def test_generate_classification_report( test_output_dirs: OutputFolderForTests) -> None: reports_folder = Path(__file__).parent test_metrics_file = reports_folder / "test_metrics_classification.csv" val_metrics_file = reports_folder / "val_metrics_classification.csv" dataset_csv_path = reports_folder / 'dataset.csv' dataset_subject_column = "subject" dataset_file_column = "filePath" current_dir = test_output_dirs.make_sub_dir("test_classification_report") result_file = current_dir / "report.ipynb" result_html = generate_classification_notebook( result_notebook=result_file, val_metrics=val_metrics_file, test_metrics=test_metrics_file, dataset_csv_path=dataset_csv_path, dataset_subject_column=dataset_subject_column, dataset_file_column=dataset_file_column) assert result_file.is_file() assert result_html.is_file() assert result_html.suffix == ".html"
def test_train_classification_multilabel_model( test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = DummyMulticlassClassification() config.set_output_to(test_output_dirs.root_dir) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=Path(test_output_dirs.root_dir)) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 model_training_result = model_training.model_train( config, checkpoint_handler=checkpoint_handler) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] expected_train_loss = [ 0.699870228767395, 0.6239662170410156, 0.551329493522644, 0.4825132489204407 ] expected_val_loss = [ 0.6299371719360352, 0.5546272993087769, 0.4843321740627289, 0.41909298300743103 ] # Ensure that all metrics are computed on both training and validation set assert len( model_training_result.train_results_per_epoch) == config.num_epochs assert len( model_training_result.val_results_per_epoch) == config.num_epochs assert len(model_training_result.train_results_per_epoch[0]) >= 11 assert len(model_training_result.val_results_per_epoch[0]) >= 11 for class_name in config.class_names: for metric in [ MetricType.ACCURACY_AT_THRESHOLD_05, MetricType.ACCURACY_AT_OPTIMAL_THRESHOLD, MetricType.AREA_UNDER_PR_CURVE, MetricType.AREA_UNDER_ROC_CURVE, MetricType.CROSS_ENTROPY ]: assert f'{metric.value}/{class_name}' in model_training_result.train_results_per_epoch[ 0], f"{metric.value} not in training" assert f'{metric.value}/{class_name}' in model_training_result.val_results_per_epoch[ 0], f"{metric.value} not in validation" for metric in [ MetricType.LOSS, MetricType.SECONDS_PER_EPOCH, MetricType.SUBJECT_COUNT ]: assert metric.value in model_training_result.train_results_per_epoch[ 0], f"{metric.value} not in training" assert metric.value in model_training_result.val_results_per_epoch[ 0], f"{metric.value} not in validation" actual_train_loss = model_training_result.get_metric( is_training=True, metric_type=MetricType.LOSS.value) actual_val_loss = model_training_result.get_metric( is_training=False, metric_type=MetricType.LOSS.value) actual_lr = model_training_result.get_metric( is_training=True, metric_type=MetricType.LEARNING_RATE.value) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6), "Training loss" assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6), "Validation loss" assert actual_lr == pytest.approx(expected_learning_rates, rel=1e-5), "Learning rates" test_results = model_testing.model_test( config, ModelExecutionMode.TRAIN, checkpoint_handler=checkpoint_handler) assert isinstance(test_results, InferenceMetricsForClassification) expected_metrics = { MetricType.CROSS_ENTROPY: [1.3996, 5.2966, 1.4020, 0.3553, 0.6908], MetricType.ACCURACY_AT_THRESHOLD_05: [0.0000, 0.0000, 0.0000, 1.0000, 1.0000] } for i, class_name in enumerate(config.class_names): for metric in expected_metrics.keys(): assert expected_metrics[metric][i] == pytest.approx( test_results.metrics.get_single_metric(metric_name=metric, hue=class_name), 1e-4) def get_epoch_path(mode: ModelExecutionMode) -> Path: p = get_epoch_results_path(mode=mode) return config.outputs_folder / p / SUBJECT_METRICS_FILE_NAME path_to_best_epoch_train = get_epoch_path(ModelExecutionMode.TRAIN) path_to_best_epoch_val = get_epoch_path(ModelExecutionMode.VAL) path_to_best_epoch_test = get_epoch_path(ModelExecutionMode.TEST) generate_classification_notebook( result_notebook=config.outputs_folder / get_ipynb_report_name(config.model_category.value), config=config, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) assert (config.outputs_folder / get_html_report_name(config.model_category.value)).exists() report_name_multilabel = f"{config.model_category.value}_multilabel" generate_classification_multilabel_notebook( result_notebook=config.outputs_folder / get_ipynb_report_name(report_name_multilabel), config=config, train_metrics=path_to_best_epoch_train, val_metrics=path_to_best_epoch_val, test_metrics=path_to_best_epoch_test) assert (config.outputs_folder / get_html_report_name(report_name_multilabel)).exists()