def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> DatasetExportInstruction: location = "DatasetExportParser" ParameterValidator.assert_keys(list(instruction.keys()), DatasetExportParser.VALID_KEYS, location, key) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names( DataExporter, "Exporter", 'dataset_export/') ParameterValidator.assert_all_in_valid_list( instruction["export_formats"], valid_formats, location, "export_formats") ParameterValidator.assert_all_in_valid_list( instruction["datasets"], symbol_table.get_keys_by_type(SymbolType.DATASET), location, "datasets") return DatasetExportInstruction( datasets=[ symbol_table.get(dataset_key) for dataset_key in instruction["datasets"] ], exporters=[ ReflectionHandler.get_class_by_name(f"{key}Exporter", "dataset_export/") for key in instruction["export_formats"] ], name=key)
def _parse_motif(key: str, motif_item: dict) -> Motif: motif_dict = copy.deepcopy(motif_item) valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names( MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/") instantiation_object = ObjectParser.parse_object( motif_item["instantiation"], valid_values, "Instantiation", "motif_instantiation_strategy", "MotifParser", key) motif_dict["instantiation"] = instantiation_object motif_dict["identifier"] = key if "name_chain1" in motif_item: motif_dict["name_chain1"] = Chain[ motif_item["name_chain1"].upper()] if "name_chain2" in motif_item: motif_dict["name_chain2"] = Chain[ motif_item["name_chain2"].upper()] assert "seed" in motif_dict or all(el in motif_dict for el in ["name_chain1", "name_chain2", "seed_chain1", "seed_chain2"]), \ "MotifParser: please check the documentation for motif definition. Either parameter `seed` has to be set (for simulation in single " \ "chain data) or all of the parameters `name_chain1`, `name_chain2`, `seed_chain1`, `seed_chain2` (for simulation for paired chain data)." motif = Motif(**motif_dict) return motif
def _parse_ml_method(ml_method_id: str, ml_specification) -> tuple: valid_class_values = ReflectionHandler.all_nonabstract_subclass_basic_names(MLMethod, "", "ml_methods/") if type(ml_specification) is str: ml_specification = {ml_specification: {}} ml_specification = {**DefaultParamsLoader.load("ml_methods/", "MLMethod"), **ml_specification} ml_specification_keys = list(ml_specification.keys()) ParameterValidator.assert_all_in_valid_list(list(ml_specification_keys), ["model_selection_cv", "model_selection_n_folds"] + valid_class_values, "MLParser", ml_method_id) non_default_keys = [key for key in ml_specification.keys() if key not in ["model_selection_cv", "model_selection_n_folds"]] assert len(ml_specification_keys) == 3, f"MLParser: ML method {ml_method_id} was not correctly specified. Expected at least 1 key " \ f"(ML method name), got {len(ml_specification_keys) - 2} instead: " \ f"{str([key for key in non_default_keys])[1:-1]}." ml_method_class_name = non_default_keys[0] ml_method_class = ReflectionHandler.get_class_by_name(ml_method_class_name, "ml_methods/") ml_specification[ml_method_class_name] = {**DefaultParamsLoader.load("ml_methods/", ml_method_class_name, log_if_missing=False), **ml_specification[ml_method_class_name]} method, params = MLParser.create_method_instance(ml_specification, ml_method_class, ml_method_id) ml_specification[ml_method_class_name] = params method.name = ml_method_id return method, ml_specification
def get_documentation(): doc = str(SubsamplingInstruction.__doc__) valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names( DataExporter, "Exporter", "dataset_export/") valid_strategy_values = str(valid_strategy_values)[1:-1].replace( "'", "`") mapping = { "Valid formats are class names of any non-abstract class inheriting " ":py:obj:`~source.IO.dataset_export.DataExporter.DataExporter`.": f"Valid values are: {valid_strategy_values}." } doc = update_docs_per_mapping(doc, mapping) return doc
def get_documentation(): initial_doc = str(Signal.__doc__) valid_implanting_values = str( ReflectionHandler.all_nonabstract_subclass_basic_names( SignalImplantingStrategy, 'Implanting', 'signal_implanting_strategy/'))[1:-1].replace("'", "`") docs_mapping = { "Valid values for this argument are class names of different signal implanting strategies.": f"Valid values are: {valid_implanting_values}" } doc = update_docs_per_mapping(initial_doc, docs_mapping) return doc
def _parse_report(key: str, params: dict, symbol_table: SymbolTable): valid_values = ReflectionHandler.all_nonabstract_subclass_basic_names( Report, "", "reports/") report_object, params = ObjectParser.parse_object( params, valid_values, "", "reports/", "ReportParser", key, builder=True, return_params_dict=True) symbol_table.add(key, SymbolType.REPORT, report_object) return symbol_table, params
def get_documentation(): doc = str(Motif.__doc__) valid_strategy_values = ReflectionHandler.all_nonabstract_subclass_basic_names( MotifInstantiationStrategy, "Instantiation", "motif_instantiation_strategy/") valid_strategy_values = str(valid_strategy_values)[1:-1].replace( "'", "`") chain_values = str([name for name in Chain])[1:-1].replace("'", "`") mapping = { "It should be one of the classes inheriting MotifInstantiationStrategy.": f"Valid values are: {valid_strategy_values}.", "The value should be an instance of :py:obj:`~source.data_model.receptor.receptor_sequence.Chain.Chain`.": f"Valid values are: {chain_values}." } doc = update_docs_per_mapping(doc, mapping) return doc
def _parse_sequence(key: str, preproc_sequence: list, symbol_table: SymbolTable) -> SymbolTable: sequence = [] valid_preprocessing_classes = ReflectionHandler.all_nonabstract_subclass_basic_names( Preprocessor, "", "preprocessing/") for item in preproc_sequence: for step_key, step in item.items(): obj, params = ObjectParser.parse_object( step, valid_preprocessing_classes, "", "preprocessing/", "PreprocessingParser", step_key, True, True) step = params sequence.append(obj) symbol_table.add(key, SymbolType.PREPROCESSING, sequence) return symbol_table
def parse_encoder(key: str, specs: dict): class_path = "encodings" valid_encoders = ReflectionHandler.all_nonabstract_subclass_basic_names( DatasetEncoder, "Encoder", class_path) encoder = ObjectParser.get_class(specs, valid_encoders, "Encoder", class_path, "EncodingParser", key) params = ObjectParser.get_all_params(specs, class_path, encoder.__name__[:-7], key) required_params = [ p for p in list( inspect.signature(encoder.__init__).parameters.keys()) if p != "self" ] ParameterValidator.assert_all_in_valid_list( params.keys(), required_params, "EncoderParser", f"{key}/{encoder.__name__.replace('Encoder', '')}") return encoder, params
def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> SubsamplingInstruction: valid_keys = ["type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats"] ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key) dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET) ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset') dataset = symbol_table.get(instruction['dataset']) ParameterValidator.assert_type_and_value(instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes') ParameterValidator.assert_all_type_and_value(instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count()) valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', "dataset_export/") ParameterValidator.assert_type_and_value(instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats") ParameterValidator.assert_all_in_valid_list(instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats") return SubsamplingInstruction(dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'], dataset_export_formats=[ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/") for export_format in instruction['dataset_export_formats']], name=key)
def _parse_dataset(key: str, dataset_specs: dict, symbol_table: SymbolTable, result_path: str) -> SymbolTable: location = "ImportParser" ParameterValidator.assert_keys(list(dataset_specs.keys()), ImportParser.valid_keys, location, f"datasets:{key}", False) valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataImport, "Import", "IO/dataset_import/") ParameterValidator.assert_in_valid_list(dataset_specs["format"], valid_formats, location, "format") import_cls = ReflectionHandler.get_class_by_name("{}Import".format(dataset_specs["format"])) params = ImportParser._prepare_params(dataset_specs, result_path, key) if "is_repertoire" in params: ParameterValidator.assert_type_and_value(params["is_repertoire"], bool, location, "is_repertoire") if params["is_repertoire"] == True: if import_cls != IReceptorImport: assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/" ParameterValidator.assert_type_and_value(params["metadata_file"], str, location, "metadata_file") if params["is_repertoire"] == False: assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/" ParameterValidator.assert_type_and_value(params["paired"], bool, location, "paired") if params["paired"] == True: assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/" ParameterValidator.assert_in_valid_list(params["receptor_chains"], ["_".join(cp.value) for cp in ChainPair], location, "receptor_chains") try: dataset = import_cls.import_dataset(params, key) dataset.name = key symbol_table.add(key, SymbolType.DATASET, dataset) except KeyError as key_error: raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. " f"The keyword {key_error.args[0]} was missing. This either means this argument was " f"not defined under definitions/datasets/{key}/params, or this column was missing from " f"an input data file. ") except Exception as ex: raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.") return symbol_table
def get_documentation(): doc = str(TrainMLModelInstruction.__doc__) valid_values = str([metric.name.lower() for metric in Metric])[1:-1].replace("'", "`") valid_strategies = str(ReflectionHandler.all_nonabstract_subclass_basic_names(HPOptimizationStrategy, "", "hyperparameter_optimization/strategy/"))[1:-1]\ .replace("'", "`") mapping = { "dataset (Dataset)": "dataset", "hp_strategy (HPOptimizationStrategy)": "strategy", "hp_settings": "settings", "assessment (SplitConfig)": "assessment", "selection (SplitConfig)": "selection", "optimization_metric (Metric)": "optimization_metric", "label_configuration (LabelConfiguration)": "labels (list)", "data_reports": "reports", "a list of metrics": f"a list of metrics ({valid_values})", "a metric to use for optimization": f"a metric to use for optimization (one of {valid_values})", "Valid values are objects of any class inheriting :py:obj:`~source.hyperparameter_optimization.strategy." "HPOptimizationStrategy.HPOptimizationStrategy`.": f"Valid values are: {valid_strategies}.", "the reports to be specified here have to be :py:obj:`source.reports.train_ml_model_reports.TrainMLModelReport.TrainMLModelReport` reports.": f"the reports that can be provided here are :ref:`{TrainMLModelReport.get_title()}`." } doc = update_docs_per_mapping(doc, mapping) return doc