Пример #1
0
 def make_html_map(state: MLApplicationState, base_path: Path) -> dict:
     return {
         "css_style":
         Util.get_css_content(MLApplicationHTMLBuilder.CSS_PATH),
         "hp_setting":
         state.hp_setting.get_key(),
         'immuneML_version':
         MLUtil.get_immuneML_version(),
         "label":
         state.label_config.get_labels_by_name()[0],
         "dataset_name":
         state.dataset.name,
         "dataset_type":
         StringHelper.camel_case_to_word_string(
             type(state.dataset).__name__),
         "example_count":
         state.dataset.get_example_count(),
         "dataset_size":
         f"{state.dataset.get_example_count()} {type(state.dataset).__name__.replace('Dataset', 's').lower()}",
         "labels": [{
             "name":
             label_name,
             "values":
             str(state.label_config.get_label_values(label_name))[1:-1]
         } for label_name in state.label_config.get_labels_by_name()],
         "predictions":
         Util.get_table_string_from_csv(state.predictions_path),
         "predictions_download_link":
         os.path.relpath(state.predictions_path, base_path)
     }
Пример #2
0
    def _make_selection_reports_for_item_list(hp_items: list,
                                              base_path) -> list:
        result = []

        for split_index, hp_item in enumerate(hp_items):
            result.append({
                "split_index":
                split_index + 1,
                "has_encoding_train_reports":
                len(hp_item.encoding_train_results) > 0,
                "has_encoding_test_reports":
                len(hp_item.encoding_test_results) > 0,
                "has_ml_reports":
                len(hp_item.model_report_results) > 0,
                "encoding_train_reports":
                Util.to_dict_recursive(hp_item.encoding_train_results,
                                       base_path)
                if len(hp_item.encoding_train_results) > 0 else None,
                "encoding_test_reports":
                Util.to_dict_recursive(hp_item.encoding_test_results,
                                       base_path)
                if len(hp_item.encoding_test_results) > 0 else None,
                "ml_reports":
                Util.to_dict_recursive(hp_item.model_report_results, base_path)
                if len(hp_item.model_report_results) > 0 else None,
            })

        return result if len(result) > 0 else None
Пример #3
0
    def make_html_map(state: DatasetExportState, base_path: Path) -> dict:
        html_map = {
            "css_style": Util.get_css_content(DatasetExportHTMLBuilder.CSS_PATH),
            "name": state.name,
            'immuneML_version': MLUtil.get_immuneML_version(),
            "full_specs": Util.get_full_specs_path(base_path),
            "datasets": [
                {
                    "dataset_name": dataset.name,
                    "dataset_type": StringHelper.camel_case_to_word_string(type(dataset).__name__),
                    "dataset_size": f"{dataset.get_example_count()} {type(dataset).__name__.replace('Dataset', 's').lower()}",
                    "labels": [{"label_name": label} for label in dataset.get_label_names()],
                    "preprocessing_sequence": [
                        {
                            "preprocessing_name": preprocessing.__class__.__name__,
                            "preprocessing_params": ", ".join([f"{key}: {value}" for key, value in vars(preprocessing).items()])
                        } for preprocessing in state.preprocessing_sequence
                    ] if state.preprocessing_sequence is not None else [],
                    "show_preprocessing": state.preprocessing_sequence is not None and len(state.preprocessing_sequence) > 0,
                    "formats": [
                        {
                            "format_name": format_name,
                            "dataset_download_link": os.path.relpath(path=Util.make_downloadable_zip(state.result_path, state.paths[dataset.name][format_name]),
                                                                     start=base_path)
                        } for format_name in state.formats
                    ]
                } for dataset in state.datasets
            ]
        }

        return html_map
Пример #4
0
    def make_html_map(state: DatasetExportState, base_path: Path) -> dict:
        html_map = {
            "css_style":
            Util.get_css_content(DatasetExportHTMLBuilder.CSS_PATH),
            "name":
            state.name,
            'immuneML_version':
            MLUtil.get_immuneML_version(),
            "full_specs":
            Util.get_full_specs_path(base_path),
            "datasets": [{
                "dataset_name":
                dataset.name,
                "dataset_type":
                StringHelper.camel_case_to_word_string(type(dataset).__name__),
                "dataset_size":
                f"{dataset.get_example_count()} {type(dataset).__name__.replace('Dataset', 's').lower()}",
                "labels": [{
                    "label_name": label
                } for label in dataset.get_label_names()],
                "formats": [{
                    "format_name":
                    format_name,
                    "dataset_download_link":
                    os.path.relpath(path=Util.make_downloadable_zip(
                        state.result_path,
                        state.paths[dataset.name][format_name]),
                                    start=base_path)
                } for format_name in state.formats]
            } for dataset in state.datasets]
        }

        return html_map
Пример #5
0
    def make_html_map(state: ExploratoryAnalysisState,
                      base_path: Path) -> dict:
        html_map = {
            "css_style":
            Util.get_css_content(ExploratoryAnalysisHTMLBuilder.CSS_PATH),
            "full_specs":
            Util.get_full_specs_path(base_path),
            'immuneML_version':
            MLUtil.get_immuneML_version(),
            "analyses": [{
                "name":
                name,
                "dataset_name":
                analysis.dataset.name if analysis.dataset.name is not None else
                analysis.dataset.identifier,
                "dataset_type":
                StringHelper.camel_case_to_word_string(
                    type(analysis.dataset).__name__),
                "example_count":
                analysis.dataset.get_example_count(),
                "dataset_size":
                f"{analysis.dataset.get_example_count()} {type(analysis.dataset).__name__.replace('Dataset', 's').lower()}",
                "show_labels":
                analysis.label_config is not None
                and len(analysis.label_config.get_labels_by_name()) > 0,
                "labels": [{
                    "name": label.name,
                    "values": str(label.values)[1:-1]
                } for label in analysis.label_config.get_label_objects()]
                if analysis.label_config else None,
                "encoding_key":
                analysis.encoder.name
                if analysis.encoder is not None else None,
                "encoding_name":
                StringHelper.camel_case_to_word_string(
                    type(analysis.encoder).__name__)
                if analysis.encoder is not None else None,
                "encoding_params": [{
                    "param_name": key,
                    "param_value": value
                } for key, value in vars(analysis.encoder).items()]
                if analysis.encoder is not None else None,
                "show_encoding":
                analysis.encoder is not None,
                "report":
                Util.to_dict_recursive(analysis.report_result, base_path)
            } for name, analysis in state.exploratory_analysis_units.items()]
        }

        for analysis in html_map["analyses"]:
            analysis["show_tables"] = len(
                analysis["report"]["output_tables"]
            ) > 0 if "output_tables" in analysis["report"] else False
            analysis["show_text"] = len(
                analysis["report"]["output_text"]
            ) > 0 if "output_text" in analysis["report"] else False

        return html_map
Пример #6
0
    def _make_selection(state: TrainMLModelState, assessment_index: int, label: str, base_path):
        selection_state = state.assessment_states[assessment_index].label_states[label].selection_state

        hp_settings = []
        optimal = selection_state.optimal_hp_setting.get_key()

        for hp_setting, hp_items in selection_state.hp_items.items():
            hp_splits = []
            for hp_item in hp_items:
                hp_splits.append(HPHTMLBuilder._print_metric(hp_item.performance, state.optimization_metric))
            hp_settings.append({
                "hp_setting": hp_setting,
                "hp_splits": hp_splits,
                "optimal": hp_setting == optimal
            })

            performances = [HPHTMLBuilder._print_metric(hp_item.performance, state.optimization_metric) for hp_item in hp_items]
            if len(performances) > 1:
                hp_settings[-1]["average"] = round(statistics.mean(perf for perf in performances if [isinstance(perf, float)]), HPHTMLBuilder.NUM_DIGITS)
                hp_settings[-1]["show_average"] = True
            else:
                hp_settings[-1]["average"] = None
                hp_settings[-1]["show_average"] = False

        has_other_metrics = len([metric for metric in state.metrics if metric != state.optimization_metric]) > 0 and \
                            not (state.selection.split_strategy == SplitType.RANDOM and state.selection.training_percentage == 1)

        return {
            "css_style": Util.get_css_content(HPHTMLBuilder.CSS_PATH),
            "label": label,
            "assessment_split": assessment_index + 1,
            "splits": [{"split_index": i} for i in range(1, state.selection.split_count + 1)],
            "split_count": state.selection.split_count,
            "optimization_metric": state.optimization_metric.name.lower(),
            "has_other_metrics": has_other_metrics,
            "metrics": [{"performance": HPHTMLBuilder._extract_selection_performance_per_metric(selection_state, metric, state.selection.split_count),
                         "metric": HPHTMLBuilder._get_heading_metric_name(metric.name.lower())}
                        for metric in state.metrics if metric != state.optimization_metric] if has_other_metrics else None,
            "hp_settings": hp_settings,
            "show_average": any(hps["show_average"] for hps in hp_settings),
            "data_split_reports": [
                {'split_index': index + 1,
                 'train': Util.to_dict_recursive(selection_state.train_data_reports[index], base_path)
                 if len(selection_state.train_data_reports) == state.selection.split_count else None,
                 'test': Util.to_dict_recursive(selection_state.val_data_reports[index], base_path)
                 if len(selection_state.train_data_reports) == state.selection.split_count else None}
                for index in range(state.selection.split_count)] if len(state.selection.reports.data_split_reports) > 0 else None,
            "has_data_split_reports": len(state.selection.reports.data_split_reports) > 0,
            "has_reports_per_setting": len(state.selection.reports.encoding_reports) + len(state.selection.reports.model_reports) > 0,
            "reports_per_setting": [{
                "hp_setting": hp_setting,
                "reports": HPHTMLBuilder._make_selection_reports_for_item_list(hp_items, base_path)
            } for hp_setting, hp_items in selection_state.hp_items.items()]
        }
    def _make_html_map(report_results: dict, result_path: Path, instruction_result_paths: dict) -> dict:
        html_map = {
            "css_style": Util.get_css_content(MultiDatasetBenchmarkHTMLBuilder.CSS_PATH),
            "reports": Util.to_dict_recursive(report_results.values(), result_path),
            'immuneML_version': MLUtil.get_immuneML_version(),
            "show_reports": True,
            "instruction_overviews": [{"name": name, "path": Path(os.path.relpath(path / "index.html", result_path))}
                                      for name, path in instruction_result_paths.items()]
        }

        if len(html_map['reports']) == 0:
            html_map['show_reports'] = False

        return html_map
Пример #8
0
    def _make_main_html_map(state: TrainMLModelState, base_path: Path) -> dict:
        html_map = {
            "css_style":
            Util.get_css_content(HPHTMLBuilder.CSS_PATH),
            "full_specs":
            Util.get_full_specs_path(base_path),
            "dataset_name":
            state.dataset.name
            if state.dataset.name is not None else state.dataset.identifier,
            "dataset_type":
            StringHelper.camel_case_to_word_string(
                type(state.dataset).__name__),
            "example_count":
            state.dataset.get_example_count(),
            "dataset_size":
            f"{state.dataset.get_example_count()} {type(state.dataset).__name__.replace('Dataset', 's').lower()}",
            "labels": [{
                "name": label.name,
                "values": str(label.values)[1:-1]
            } for label in state.label_configuration.get_label_objects()],
            "optimization_metric":
            state.optimization_metric.name.lower(),
            "other_metrics":
            str([metric.name.lower()
                 for metric in state.metrics])[1:-1].replace("'", ""),
            "metrics": [{
                "name": metric.name.lower()
            } for metric in state.metrics],
            "assessment_desc":
            state.assessment,
            "selection_desc":
            state.selection,
            "show_hp_reports":
            bool(state.report_results),
            'hp_reports':
            Util.to_dict_recursive(state.report_results, base_path)
            if state.report_results else None,
            "hp_per_label":
            HPHTMLBuilder._make_hp_per_label(state),
            'models_per_label':
            HPHTMLBuilder._make_model_per_label(state, base_path),
            'immuneML_version':
            MLUtil.get_immuneML_version()
        }

        return html_map
Пример #9
0
    def _make_assessment_pages(state: TrainMLModelState, base_path: Path, label: str):
        assessment_list = []

        for i, assessment_state in enumerate(state.assessment_states):

            assessment_item = {"css_style": Util.get_css_content(HPHTMLBuilder.CSS_PATH),
                               "optimization_metric": state.optimization_metric.name.lower(),
                               "split_index": assessment_state.split_index + 1,
                               "hp_settings": [],
                               "has_reports": len(state.assessment.reports.model_reports) + len(state.assessment.reports.encoding_reports) > 0,
                               "train_data_reports": Util.to_dict_recursive(assessment_state.train_val_data_reports, base_path),
                               "test_data_reports": Util.to_dict_recursive(assessment_state.test_data_reports, base_path),
                               "show_data_reports": len(assessment_state.train_val_data_reports) > 0 or len(assessment_state.test_data_reports) > 0}

            if hasattr(assessment_state.train_val_dataset, "metadata_file") and assessment_state.train_val_dataset.metadata_file is not None:
                assessment_item["train_metadata_path"] = os.path.relpath(str(assessment_state.train_val_dataset.metadata_file), str(base_path))
                assessment_item["train_metadata"] = Util.get_table_string_from_csv(assessment_state.train_val_dataset.metadata_file)
            else:
                assessment_item["train_metadata_path"] = None

            if hasattr(assessment_state.test_dataset, "metadata_file") and assessment_state.test_dataset.metadata_file is not None:
                assessment_item['test_metadata_path'] = os.path.relpath(assessment_state.test_dataset.metadata_file, base_path)
                assessment_item["test_metadata"] = Util.get_table_string_from_csv(assessment_state.test_dataset.metadata_file)
            else:
                assessment_item["test_metadata_path"] = None

            assessment_item["label"] = label
            for hp_setting, item in assessment_state.label_states[label].assessment_items.items():
                optimal = str(assessment_state.label_states[label].optimal_hp_setting.get_key())
                reports_path = HPHTMLBuilder._make_assessment_reports(state, i, hp_setting, assessment_state, label, base_path)
                assessment_item["hp_settings"].append({
                    "optimal": str(hp_setting) == optimal,
                    "hp_setting": str(hp_setting),
                    "optimization_metric_val": HPHTMLBuilder._print_metric(item.performance, state.optimization_metric),
                    "reports_path": reports_path
                })
            assessment_item["show_non_optimal"] = len(assessment_item["hp_settings"]) > 1

            assessment_item["selection_path"] = HPHTMLBuilder._make_selection_split_path(i, label, state.name)
            assessment_item['performances_per_metric'] = HPHTMLBuilder._extract_assessment_performances_per_metric(state, assessment_state, label)

            assessment_list.append(assessment_item)

        return assessment_list
Пример #10
0
    def _extract_selection_performance_per_metric(selection_state: HPSelectionState, metric: Metric, split_count):
        performance = {"setting": [], **{f"split {i + 1}": [] for i in range(split_count)}}
        for hp_setting, hp_item_list in selection_state.hp_items.items():
            performance['setting'].append(str(hp_setting))
            for index, hp_item in enumerate(hp_item_list):
                performance[f'split {index + 1}'].append(HPHTMLBuilder._print_metric(hp_item.performance, metric))

        s = io.StringIO()
        pd.DataFrame(performance).rename(columns={"setting": 'Hyperparameter settings (preprocessing, encoding, ML method)'}).to_csv(s, sep="\t",
                                                                                                                                     index=False)
        return Util.get_table_string_from_csv_string(s.getvalue(), separator="\t")
Пример #11
0
    def _extract_assessment_performances_per_metric(state: TrainMLModelState, assessment_state: HPAssessmentState, label: str) -> str:
        performance_metric = {"setting": [], **{metric.name.lower(): [] for metric in state.metrics}}
        for hp_setting, hp_item in assessment_state.label_states[label].assessment_items.items():
            performance_metric['setting'].append(str(hp_setting))
            for metric in sorted(state.metrics, key=lambda metric: metric.name.lower()):
                performance_metric[metric.name.lower()].append(HPHTMLBuilder._print_metric(hp_item.performance, metric))

        s = io.StringIO()
        pd.DataFrame(performance_metric).rename(columns={"setting": 'Hyperparameter settings (preprocessing, encoding, ML method)'})\
            .to_csv(s, sep="\t", index=False)
        return Util.get_table_string_from_csv_string(s.getvalue(), separator="\t")
Пример #12
0
    def _make_assessment_reports(state, i, hp_setting_key, assessment_state,
                                 label, base_path: Path):
        path = base_path / f"{state.name}_{label}_{hp_setting_key}_assessment_reports_split_{i + 1}.html"

        hp_item = assessment_state.label_states[label].assessment_items[
            hp_setting_key]
        data = {
            "split_index":
            i + 1,
            "hp_setting":
            hp_setting_key,
            "label":
            label,
            "css_style":
            Util.get_css_content(HPHTMLBuilder.CSS_PATH),
            "has_encoding_reports":
            len(hp_item.encoding_train_results) > 0
            or len(hp_item.encoding_test_results) > 0,
            "has_ml_reports":
            len(hp_item.model_report_results) > 0,
            "encoding_train_reports":
            Util.to_dict_recursive(hp_item.encoding_train_results, base_path)
            if len(hp_item.encoding_train_results) > 0 else None,
            "encoding_test_reports":
            Util.to_dict_recursive(hp_item.encoding_test_results, base_path)
            if len(hp_item.encoding_test_results) > 0 else None,
            "ml_reports":
            Util.to_dict_recursive(hp_item.model_report_results, base_path)
            if len(hp_item.model_report_results) > 0 else None
        }

        if data["has_ml_reports"] or data["has_encoding_reports"]:
            TemplateParser.parse(
                template_path=EnvironmentSettings.html_templates_path /
                "Reports.html",
                template_map=data,
                result_path=path)
            return path.name
        else:
            return None
Пример #13
0
    def make_html_map(state: SimulationState, base_path: Path) -> dict:

        html_map = {
            "css_style":
            Util.get_css_content(SimulationHTMLBuilder.CSS_PATH),
            "name":
            state.name,
            'immuneML_version':
            MLUtil.get_immuneML_version(),
            "full_specs":
            Util.get_full_specs_path(base_path),
            "dataset_name":
            state.resulting_dataset.name if state.resulting_dataset.name
            is not None else state.resulting_dataset.identifier,
            "dataset_type":
            StringHelper.camel_case_to_word_string(
                type(state.resulting_dataset).__name__),
            "example_count":
            state.resulting_dataset.get_example_count(),
            "dataset_size":
            f"{state.resulting_dataset.get_example_count()} {type(state.resulting_dataset).__name__.replace('Dataset', 's').lower()}",
            "labels": [{
                "label_name": label
            } for label in state.resulting_dataset.get_label_names()],
            "formats": [{
                "format_name":
                format_name,
                "dataset_download_link":
                os.path.relpath(path=Util.make_downloadable_zip(
                    state.result_path,
                    state.paths[state.resulting_dataset.name][format_name]),
                                start=base_path)
            } for format_name in state.formats],
            "implantings": [
                Util.to_dict_recursive(implanting, base_path)
                for implanting in state.simulation.implantings
            ]
        }

        return html_map
Пример #14
0
    def _make_document(presentations: List[InstructionPresentation], path: Path) -> Path:
        result_path = path / "index.html"
        if len(presentations) > 1:
            html_map = {"instructions": presentations, "css_path": EnvironmentSettings.html_templates_path / "css/custom.css",
                        "full_specs": Util.get_full_specs_path(path), 'immuneML_version': MLUtil.get_immuneML_version()}
            TemplateParser.parse(template_path=EnvironmentSettings.html_templates_path / "index.html",
                                 template_map=html_map, result_path=result_path)
        elif len(presentations) == 1:
            shutil.copyfile(str(presentations[0].path), str(result_path))
            HTMLBuilder._update_paths(result_path)
        else:
            result_path = None

        return result_path
Пример #15
0
    def make_html_map(state: SubsamplingState, base_path: Path) -> dict:
        html_map = {
            "css_style":
            Util.get_css_content(SubsamplingHTMLBuilder.CSS_PATH),
            "name":
            state.name,
            'immuneML_version':
            MLUtil.get_immuneML_version(),
            "full_specs":
            Util.get_full_specs_path(base_path),
            "dataset_name":
            state.dataset.name
            if state.dataset.name is not None else state.dataset.identifier,
            "labels": [{
                "label_name": label
            } for label in state.dataset.get_label_names()],
            "dataset_type":
            StringHelper.camel_case_to_word_string(
                type(state.dataset).__name__),
            "example_count":
            state.dataset.get_example_count(),
            "subsampled_datasets": [{
                "sub_dataset_iter":
                i,
                "sub_dataset_name":
                dataset.name,
                "dataset_size":
                f"{dataset.get_example_count()} {type(dataset).__name__.replace('Dataset', 's').lower()}",
                "formats": [{
                    "dataset_download_link": item,
                    "format_name": key
                } for key, item in state.subsampled_dataset_paths[
                    dataset.name].items()]
            } for i, dataset in enumerate(state.subsampled_datasets, 1)]
        }

        return html_map
Пример #16
0
    def _move_reports_recursive(obj, path: Path):
        for attribute in (vars(obj) if not isinstance(obj, dict) else obj):
            attribute_value = getattr(obj, attribute) if not isinstance(obj, dict) else obj[attribute]
            if isinstance(attribute_value, list) and all(isinstance(item, ReportResult) for item in attribute_value):
                new_attribute_values = []
                for report_result in attribute_value:
                    new_attribute_values.append(Util.update_report_paths(report_result, path))
                setattr(obj, attribute, new_attribute_values)
            elif isinstance(attribute_value, list) and all(isinstance(item, HPAssessmentState) for item in attribute_value):
                obj = HPHTMLBuilder._process_list_recursively(obj, attribute, attribute_value, path)
            elif isinstance(attribute_value, dict) and all(
                    isinstance(item, HPLabelState) or isinstance(item, HPItem) for item in attribute_value.values()):
                obj = HPHTMLBuilder._process_dict_recursive(obj, attribute, attribute_value, path)
            elif isinstance(attribute_value, dict) and all(isinstance(item, list) for item in attribute_value.values()) and all(
                    all(isinstance(item, HPItem) for item in item_list) for item_list in attribute_value.values()):
                obj = HPHTMLBuilder._process_hp_items(obj, attribute, attribute_value, path)
            elif isinstance(attribute_value, HPSelectionState):
                setattr(obj, attribute, HPHTMLBuilder._move_reports_recursive(attribute_value, path))

        return obj