def make_html_map(state: MLApplicationState, base_path: Path) -> dict: return { "css_style": Util.get_css_content(MLApplicationHTMLBuilder.CSS_PATH), "hp_setting": state.hp_setting.get_key(), 'immuneML_version': MLUtil.get_immuneML_version(), "label": state.label_config.get_labels_by_name()[0], "dataset_name": state.dataset.name, "dataset_type": StringHelper.camel_case_to_word_string( type(state.dataset).__name__), "example_count": state.dataset.get_example_count(), "dataset_size": f"{state.dataset.get_example_count()} {type(state.dataset).__name__.replace('Dataset', 's').lower()}", "labels": [{ "name": label_name, "values": str(state.label_config.get_label_values(label_name))[1:-1] } for label_name in state.label_config.get_labels_by_name()], "predictions": Util.get_table_string_from_csv(state.predictions_path), "predictions_download_link": os.path.relpath(state.predictions_path, base_path) }
def _make_selection_reports_for_item_list(hp_items: list, base_path) -> list: result = [] for split_index, hp_item in enumerate(hp_items): result.append({ "split_index": split_index + 1, "has_encoding_train_reports": len(hp_item.encoding_train_results) > 0, "has_encoding_test_reports": len(hp_item.encoding_test_results) > 0, "has_ml_reports": len(hp_item.model_report_results) > 0, "encoding_train_reports": Util.to_dict_recursive(hp_item.encoding_train_results, base_path) if len(hp_item.encoding_train_results) > 0 else None, "encoding_test_reports": Util.to_dict_recursive(hp_item.encoding_test_results, base_path) if len(hp_item.encoding_test_results) > 0 else None, "ml_reports": Util.to_dict_recursive(hp_item.model_report_results, base_path) if len(hp_item.model_report_results) > 0 else None, }) return result if len(result) > 0 else None
def make_html_map(state: DatasetExportState, base_path: Path) -> dict: html_map = { "css_style": Util.get_css_content(DatasetExportHTMLBuilder.CSS_PATH), "name": state.name, 'immuneML_version': MLUtil.get_immuneML_version(), "full_specs": Util.get_full_specs_path(base_path), "datasets": [ { "dataset_name": dataset.name, "dataset_type": StringHelper.camel_case_to_word_string(type(dataset).__name__), "dataset_size": f"{dataset.get_example_count()} {type(dataset).__name__.replace('Dataset', 's').lower()}", "labels": [{"label_name": label} for label in dataset.get_label_names()], "preprocessing_sequence": [ { "preprocessing_name": preprocessing.__class__.__name__, "preprocessing_params": ", ".join([f"{key}: {value}" for key, value in vars(preprocessing).items()]) } for preprocessing in state.preprocessing_sequence ] if state.preprocessing_sequence is not None else [], "show_preprocessing": state.preprocessing_sequence is not None and len(state.preprocessing_sequence) > 0, "formats": [ { "format_name": format_name, "dataset_download_link": os.path.relpath(path=Util.make_downloadable_zip(state.result_path, state.paths[dataset.name][format_name]), start=base_path) } for format_name in state.formats ] } for dataset in state.datasets ] } return html_map
def make_html_map(state: DatasetExportState, base_path: Path) -> dict: html_map = { "css_style": Util.get_css_content(DatasetExportHTMLBuilder.CSS_PATH), "name": state.name, 'immuneML_version': MLUtil.get_immuneML_version(), "full_specs": Util.get_full_specs_path(base_path), "datasets": [{ "dataset_name": dataset.name, "dataset_type": StringHelper.camel_case_to_word_string(type(dataset).__name__), "dataset_size": f"{dataset.get_example_count()} {type(dataset).__name__.replace('Dataset', 's').lower()}", "labels": [{ "label_name": label } for label in dataset.get_label_names()], "formats": [{ "format_name": format_name, "dataset_download_link": os.path.relpath(path=Util.make_downloadable_zip( state.result_path, state.paths[dataset.name][format_name]), start=base_path) } for format_name in state.formats] } for dataset in state.datasets] } return html_map
def make_html_map(state: ExploratoryAnalysisState, base_path: Path) -> dict: html_map = { "css_style": Util.get_css_content(ExploratoryAnalysisHTMLBuilder.CSS_PATH), "full_specs": Util.get_full_specs_path(base_path), 'immuneML_version': MLUtil.get_immuneML_version(), "analyses": [{ "name": name, "dataset_name": analysis.dataset.name if analysis.dataset.name is not None else analysis.dataset.identifier, "dataset_type": StringHelper.camel_case_to_word_string( type(analysis.dataset).__name__), "example_count": analysis.dataset.get_example_count(), "dataset_size": f"{analysis.dataset.get_example_count()} {type(analysis.dataset).__name__.replace('Dataset', 's').lower()}", "show_labels": analysis.label_config is not None and len(analysis.label_config.get_labels_by_name()) > 0, "labels": [{ "name": label.name, "values": str(label.values)[1:-1] } for label in analysis.label_config.get_label_objects()] if analysis.label_config else None, "encoding_key": analysis.encoder.name if analysis.encoder is not None else None, "encoding_name": StringHelper.camel_case_to_word_string( type(analysis.encoder).__name__) if analysis.encoder is not None else None, "encoding_params": [{ "param_name": key, "param_value": value } for key, value in vars(analysis.encoder).items()] if analysis.encoder is not None else None, "show_encoding": analysis.encoder is not None, "report": Util.to_dict_recursive(analysis.report_result, base_path) } for name, analysis in state.exploratory_analysis_units.items()] } for analysis in html_map["analyses"]: analysis["show_tables"] = len( analysis["report"]["output_tables"] ) > 0 if "output_tables" in analysis["report"] else False analysis["show_text"] = len( analysis["report"]["output_text"] ) > 0 if "output_text" in analysis["report"] else False return html_map
def _make_selection(state: TrainMLModelState, assessment_index: int, label: str, base_path): selection_state = state.assessment_states[assessment_index].label_states[label].selection_state hp_settings = [] optimal = selection_state.optimal_hp_setting.get_key() for hp_setting, hp_items in selection_state.hp_items.items(): hp_splits = [] for hp_item in hp_items: hp_splits.append(HPHTMLBuilder._print_metric(hp_item.performance, state.optimization_metric)) hp_settings.append({ "hp_setting": hp_setting, "hp_splits": hp_splits, "optimal": hp_setting == optimal }) performances = [HPHTMLBuilder._print_metric(hp_item.performance, state.optimization_metric) for hp_item in hp_items] if len(performances) > 1: hp_settings[-1]["average"] = round(statistics.mean(perf for perf in performances if [isinstance(perf, float)]), HPHTMLBuilder.NUM_DIGITS) hp_settings[-1]["show_average"] = True else: hp_settings[-1]["average"] = None hp_settings[-1]["show_average"] = False has_other_metrics = len([metric for metric in state.metrics if metric != state.optimization_metric]) > 0 and \ not (state.selection.split_strategy == SplitType.RANDOM and state.selection.training_percentage == 1) return { "css_style": Util.get_css_content(HPHTMLBuilder.CSS_PATH), "label": label, "assessment_split": assessment_index + 1, "splits": [{"split_index": i} for i in range(1, state.selection.split_count + 1)], "split_count": state.selection.split_count, "optimization_metric": state.optimization_metric.name.lower(), "has_other_metrics": has_other_metrics, "metrics": [{"performance": HPHTMLBuilder._extract_selection_performance_per_metric(selection_state, metric, state.selection.split_count), "metric": HPHTMLBuilder._get_heading_metric_name(metric.name.lower())} for metric in state.metrics if metric != state.optimization_metric] if has_other_metrics else None, "hp_settings": hp_settings, "show_average": any(hps["show_average"] for hps in hp_settings), "data_split_reports": [ {'split_index': index + 1, 'train': Util.to_dict_recursive(selection_state.train_data_reports[index], base_path) if len(selection_state.train_data_reports) == state.selection.split_count else None, 'test': Util.to_dict_recursive(selection_state.val_data_reports[index], base_path) if len(selection_state.train_data_reports) == state.selection.split_count else None} for index in range(state.selection.split_count)] if len(state.selection.reports.data_split_reports) > 0 else None, "has_data_split_reports": len(state.selection.reports.data_split_reports) > 0, "has_reports_per_setting": len(state.selection.reports.encoding_reports) + len(state.selection.reports.model_reports) > 0, "reports_per_setting": [{ "hp_setting": hp_setting, "reports": HPHTMLBuilder._make_selection_reports_for_item_list(hp_items, base_path) } for hp_setting, hp_items in selection_state.hp_items.items()] }
def _make_html_map(report_results: dict, result_path: Path, instruction_result_paths: dict) -> dict: html_map = { "css_style": Util.get_css_content(MultiDatasetBenchmarkHTMLBuilder.CSS_PATH), "reports": Util.to_dict_recursive(report_results.values(), result_path), 'immuneML_version': MLUtil.get_immuneML_version(), "show_reports": True, "instruction_overviews": [{"name": name, "path": Path(os.path.relpath(path / "index.html", result_path))} for name, path in instruction_result_paths.items()] } if len(html_map['reports']) == 0: html_map['show_reports'] = False return html_map
def _make_main_html_map(state: TrainMLModelState, base_path: Path) -> dict: html_map = { "css_style": Util.get_css_content(HPHTMLBuilder.CSS_PATH), "full_specs": Util.get_full_specs_path(base_path), "dataset_name": state.dataset.name if state.dataset.name is not None else state.dataset.identifier, "dataset_type": StringHelper.camel_case_to_word_string( type(state.dataset).__name__), "example_count": state.dataset.get_example_count(), "dataset_size": f"{state.dataset.get_example_count()} {type(state.dataset).__name__.replace('Dataset', 's').lower()}", "labels": [{ "name": label.name, "values": str(label.values)[1:-1] } for label in state.label_configuration.get_label_objects()], "optimization_metric": state.optimization_metric.name.lower(), "other_metrics": str([metric.name.lower() for metric in state.metrics])[1:-1].replace("'", ""), "metrics": [{ "name": metric.name.lower() } for metric in state.metrics], "assessment_desc": state.assessment, "selection_desc": state.selection, "show_hp_reports": bool(state.report_results), 'hp_reports': Util.to_dict_recursive(state.report_results, base_path) if state.report_results else None, "hp_per_label": HPHTMLBuilder._make_hp_per_label(state), 'models_per_label': HPHTMLBuilder._make_model_per_label(state, base_path), 'immuneML_version': MLUtil.get_immuneML_version() } return html_map
def _make_assessment_pages(state: TrainMLModelState, base_path: Path, label: str): assessment_list = [] for i, assessment_state in enumerate(state.assessment_states): assessment_item = {"css_style": Util.get_css_content(HPHTMLBuilder.CSS_PATH), "optimization_metric": state.optimization_metric.name.lower(), "split_index": assessment_state.split_index + 1, "hp_settings": [], "has_reports": len(state.assessment.reports.model_reports) + len(state.assessment.reports.encoding_reports) > 0, "train_data_reports": Util.to_dict_recursive(assessment_state.train_val_data_reports, base_path), "test_data_reports": Util.to_dict_recursive(assessment_state.test_data_reports, base_path), "show_data_reports": len(assessment_state.train_val_data_reports) > 0 or len(assessment_state.test_data_reports) > 0} if hasattr(assessment_state.train_val_dataset, "metadata_file") and assessment_state.train_val_dataset.metadata_file is not None: assessment_item["train_metadata_path"] = os.path.relpath(str(assessment_state.train_val_dataset.metadata_file), str(base_path)) assessment_item["train_metadata"] = Util.get_table_string_from_csv(assessment_state.train_val_dataset.metadata_file) else: assessment_item["train_metadata_path"] = None if hasattr(assessment_state.test_dataset, "metadata_file") and assessment_state.test_dataset.metadata_file is not None: assessment_item['test_metadata_path'] = os.path.relpath(assessment_state.test_dataset.metadata_file, base_path) assessment_item["test_metadata"] = Util.get_table_string_from_csv(assessment_state.test_dataset.metadata_file) else: assessment_item["test_metadata_path"] = None assessment_item["label"] = label for hp_setting, item in assessment_state.label_states[label].assessment_items.items(): optimal = str(assessment_state.label_states[label].optimal_hp_setting.get_key()) reports_path = HPHTMLBuilder._make_assessment_reports(state, i, hp_setting, assessment_state, label, base_path) assessment_item["hp_settings"].append({ "optimal": str(hp_setting) == optimal, "hp_setting": str(hp_setting), "optimization_metric_val": HPHTMLBuilder._print_metric(item.performance, state.optimization_metric), "reports_path": reports_path }) assessment_item["show_non_optimal"] = len(assessment_item["hp_settings"]) > 1 assessment_item["selection_path"] = HPHTMLBuilder._make_selection_split_path(i, label, state.name) assessment_item['performances_per_metric'] = HPHTMLBuilder._extract_assessment_performances_per_metric(state, assessment_state, label) assessment_list.append(assessment_item) return assessment_list
def _extract_selection_performance_per_metric(selection_state: HPSelectionState, metric: Metric, split_count): performance = {"setting": [], **{f"split {i + 1}": [] for i in range(split_count)}} for hp_setting, hp_item_list in selection_state.hp_items.items(): performance['setting'].append(str(hp_setting)) for index, hp_item in enumerate(hp_item_list): performance[f'split {index + 1}'].append(HPHTMLBuilder._print_metric(hp_item.performance, metric)) s = io.StringIO() pd.DataFrame(performance).rename(columns={"setting": 'Hyperparameter settings (preprocessing, encoding, ML method)'}).to_csv(s, sep="\t", index=False) return Util.get_table_string_from_csv_string(s.getvalue(), separator="\t")
def _extract_assessment_performances_per_metric(state: TrainMLModelState, assessment_state: HPAssessmentState, label: str) -> str: performance_metric = {"setting": [], **{metric.name.lower(): [] for metric in state.metrics}} for hp_setting, hp_item in assessment_state.label_states[label].assessment_items.items(): performance_metric['setting'].append(str(hp_setting)) for metric in sorted(state.metrics, key=lambda metric: metric.name.lower()): performance_metric[metric.name.lower()].append(HPHTMLBuilder._print_metric(hp_item.performance, metric)) s = io.StringIO() pd.DataFrame(performance_metric).rename(columns={"setting": 'Hyperparameter settings (preprocessing, encoding, ML method)'})\ .to_csv(s, sep="\t", index=False) return Util.get_table_string_from_csv_string(s.getvalue(), separator="\t")
def _make_assessment_reports(state, i, hp_setting_key, assessment_state, label, base_path: Path): path = base_path / f"{state.name}_{label}_{hp_setting_key}_assessment_reports_split_{i + 1}.html" hp_item = assessment_state.label_states[label].assessment_items[ hp_setting_key] data = { "split_index": i + 1, "hp_setting": hp_setting_key, "label": label, "css_style": Util.get_css_content(HPHTMLBuilder.CSS_PATH), "has_encoding_reports": len(hp_item.encoding_train_results) > 0 or len(hp_item.encoding_test_results) > 0, "has_ml_reports": len(hp_item.model_report_results) > 0, "encoding_train_reports": Util.to_dict_recursive(hp_item.encoding_train_results, base_path) if len(hp_item.encoding_train_results) > 0 else None, "encoding_test_reports": Util.to_dict_recursive(hp_item.encoding_test_results, base_path) if len(hp_item.encoding_test_results) > 0 else None, "ml_reports": Util.to_dict_recursive(hp_item.model_report_results, base_path) if len(hp_item.model_report_results) > 0 else None } if data["has_ml_reports"] or data["has_encoding_reports"]: TemplateParser.parse( template_path=EnvironmentSettings.html_templates_path / "Reports.html", template_map=data, result_path=path) return path.name else: return None
def make_html_map(state: SimulationState, base_path: Path) -> dict: html_map = { "css_style": Util.get_css_content(SimulationHTMLBuilder.CSS_PATH), "name": state.name, 'immuneML_version': MLUtil.get_immuneML_version(), "full_specs": Util.get_full_specs_path(base_path), "dataset_name": state.resulting_dataset.name if state.resulting_dataset.name is not None else state.resulting_dataset.identifier, "dataset_type": StringHelper.camel_case_to_word_string( type(state.resulting_dataset).__name__), "example_count": state.resulting_dataset.get_example_count(), "dataset_size": f"{state.resulting_dataset.get_example_count()} {type(state.resulting_dataset).__name__.replace('Dataset', 's').lower()}", "labels": [{ "label_name": label } for label in state.resulting_dataset.get_label_names()], "formats": [{ "format_name": format_name, "dataset_download_link": os.path.relpath(path=Util.make_downloadable_zip( state.result_path, state.paths[state.resulting_dataset.name][format_name]), start=base_path) } for format_name in state.formats], "implantings": [ Util.to_dict_recursive(implanting, base_path) for implanting in state.simulation.implantings ] } return html_map
def _make_document(presentations: List[InstructionPresentation], path: Path) -> Path: result_path = path / "index.html" if len(presentations) > 1: html_map = {"instructions": presentations, "css_path": EnvironmentSettings.html_templates_path / "css/custom.css", "full_specs": Util.get_full_specs_path(path), 'immuneML_version': MLUtil.get_immuneML_version()} TemplateParser.parse(template_path=EnvironmentSettings.html_templates_path / "index.html", template_map=html_map, result_path=result_path) elif len(presentations) == 1: shutil.copyfile(str(presentations[0].path), str(result_path)) HTMLBuilder._update_paths(result_path) else: result_path = None return result_path
def make_html_map(state: SubsamplingState, base_path: Path) -> dict: html_map = { "css_style": Util.get_css_content(SubsamplingHTMLBuilder.CSS_PATH), "name": state.name, 'immuneML_version': MLUtil.get_immuneML_version(), "full_specs": Util.get_full_specs_path(base_path), "dataset_name": state.dataset.name if state.dataset.name is not None else state.dataset.identifier, "labels": [{ "label_name": label } for label in state.dataset.get_label_names()], "dataset_type": StringHelper.camel_case_to_word_string( type(state.dataset).__name__), "example_count": state.dataset.get_example_count(), "subsampled_datasets": [{ "sub_dataset_iter": i, "sub_dataset_name": dataset.name, "dataset_size": f"{dataset.get_example_count()} {type(dataset).__name__.replace('Dataset', 's').lower()}", "formats": [{ "dataset_download_link": item, "format_name": key } for key, item in state.subsampled_dataset_paths[ dataset.name].items()] } for i, dataset in enumerate(state.subsampled_datasets, 1)] } return html_map
def _move_reports_recursive(obj, path: Path): for attribute in (vars(obj) if not isinstance(obj, dict) else obj): attribute_value = getattr(obj, attribute) if not isinstance(obj, dict) else obj[attribute] if isinstance(attribute_value, list) and all(isinstance(item, ReportResult) for item in attribute_value): new_attribute_values = [] for report_result in attribute_value: new_attribute_values.append(Util.update_report_paths(report_result, path)) setattr(obj, attribute, new_attribute_values) elif isinstance(attribute_value, list) and all(isinstance(item, HPAssessmentState) for item in attribute_value): obj = HPHTMLBuilder._process_list_recursively(obj, attribute, attribute_value, path) elif isinstance(attribute_value, dict) and all( isinstance(item, HPLabelState) or isinstance(item, HPItem) for item in attribute_value.values()): obj = HPHTMLBuilder._process_dict_recursive(obj, attribute, attribute_value, path) elif isinstance(attribute_value, dict) and all(isinstance(item, list) for item in attribute_value.values()) and all( all(isinstance(item, HPItem) for item in item_list) for item_list in attribute_value.values()): obj = HPHTMLBuilder._process_hp_items(obj, attribute, attribute_value, path) elif isinstance(attribute_value, HPSelectionState): setattr(obj, attribute, HPHTMLBuilder._move_reports_recursive(attribute_value, path)) return obj