Exemplo n.º 1
0
    def _parse_ml_method(ml_method_id: str, ml_specification) -> tuple:

        valid_class_values = ReflectionHandler.all_nonabstract_subclass_basic_names(MLMethod, "", "ml_methods/")

        if type(ml_specification) is str:
            ml_specification = {ml_specification: {}}

        ml_specification = {**DefaultParamsLoader.load("ml_methods/", "MLMethod"), **ml_specification}
        ml_specification_keys = list(ml_specification.keys())

        ParameterValidator.assert_all_in_valid_list(list(ml_specification_keys), ["model_selection_cv", "model_selection_n_folds"] +
                                                    valid_class_values, "MLParser", ml_method_id)

        non_default_keys = [key for key in ml_specification.keys() if key not in ["model_selection_cv", "model_selection_n_folds"]]

        assert len(ml_specification_keys) == 3, f"MLParser: ML method {ml_method_id} was not correctly specified. Expected at least 1 key " \
                                                f"(ML method name), got {len(ml_specification_keys) - 2} instead: " \
                                                f"{str([key for key in non_default_keys])[1:-1]}."

        ml_method_class_name = non_default_keys[0]
        ml_method_class = ReflectionHandler.get_class_by_name(ml_method_class_name, "ml_methods/")

        ml_specification[ml_method_class_name] = {**DefaultParamsLoader.load("ml_methods/", ml_method_class_name, log_if_missing=False),
                                                  **ml_specification[ml_method_class_name]}

        method, params = MLParser.create_method_instance(ml_specification, ml_method_class, ml_method_id)
        ml_specification[ml_method_class_name] = params
        method.name = ml_method_id

        return method, ml_specification
Exemplo n.º 2
0
    def parse_object(specs,
                     valid_class_names: list,
                     class_name_ending: str,
                     class_path: str,
                     location: str,
                     key: str,
                     builder: bool = False,
                     return_params_dict: bool = False):

        class_name = ObjectParser.get_class_name(specs, valid_class_names,
                                                 class_name_ending, location,
                                                 key)
        ParameterValidator.assert_in_valid_list(class_name, valid_class_names,
                                                location, key)

        cls = ReflectionHandler.get_class_by_name(
            f"{class_name}{class_name_ending}", class_path)
        params = ObjectParser.get_all_params(specs, class_path, class_name,
                                             key)

        try:
            if "name" not in inspect.signature(cls.__init__).parameters.keys():
                del params["name"]
            obj = cls.build_object(
                **params) if builder and hasattr(cls, "build_object") else cls(
                    **params)
        except TypeError as err:
            raise AssertionError(
                f"{location}: invalid parameter {err.args[0]} when specifying parameters in {specs} "
                f"under key {key}. Valid parameter names are: "
                f"{[name for name in inspect.signature(cls.__init__).parameters.keys()]}"
            )

        return (obj, {class_name: params}) if return_params_dict else obj
Exemplo n.º 3
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: str = None) -> DatasetExportInstruction:
        location = "DatasetExportParser"
        ParameterValidator.assert_keys(list(instruction.keys()),
                                       DatasetExportParser.VALID_KEYS,
                                       location, key)
        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(
            DataExporter, "Exporter", 'dataset_export/')
        ParameterValidator.assert_all_in_valid_list(
            instruction["export_formats"], valid_formats, location,
            "export_formats")
        ParameterValidator.assert_all_in_valid_list(
            instruction["datasets"],
            symbol_table.get_keys_by_type(SymbolType.DATASET), location,
            "datasets")

        return DatasetExportInstruction(
            datasets=[
                symbol_table.get(dataset_key)
                for dataset_key in instruction["datasets"]
            ],
            exporters=[
                ReflectionHandler.get_class_by_name(f"{key}Exporter",
                                                    "dataset_export/")
                for key in instruction["export_formats"]
            ],
            name=key)
Exemplo n.º 4
0
 def _parse_to_enum_instances(params, location):
     for key in params.keys():
         class_name = DefaultParamsLoader._convert_to_camel_case(key)
         if ReflectionHandler.exists(class_name, location):
             cls = ReflectionHandler.get_class_by_name(class_name, location)
             params[key] = cls[params[key].upper()]
     return params
Exemplo n.º 5
0
    def import_hp_setting(config_dir: str) -> Tuple[HPSetting, Label]:

        config = MLMethodConfiguration()
        config.load(f'{config_dir}ml_config.yaml')

        ml_method = ReflectionHandler.get_class_by_name(
            config.ml_method, 'ml_methods/')()
        ml_method.load(config_dir)

        encoder = MLImport.import_encoder(config, config_dir)
        preprocessing_sequence = MLImport.import_preprocessing_sequence(
            config, config_dir)

        labels = list(config.labels_with_values.keys())
        assert len(
            labels
        ) == 1, "MLImport: Multiple labels set in a single ml_config file."

        label = Label(labels[0], config.labels_with_values[labels[0]])

        return HPSetting(
            encoder=encoder,
            encoder_params=config.encoding_parameters,
            encoder_name=config.encoding_name,
            ml_method=ml_method,
            ml_method_name=config.ml_method_name,
            ml_params={},
            preproc_sequence=preprocessing_sequence,
            preproc_sequence_name=config.preprocessing_sequence_name), label
Exemplo n.º 6
0
 def get_class(specs, valid_class_names, class_name_ending, class_path,
               location, key):
     class_name = ObjectParser.get_class_name(specs, valid_class_names,
                                              class_name_ending, location,
                                              key)
     cls = ReflectionHandler.get_class_by_name(
         f"{class_name}{class_name_ending}", class_path)
     return cls
Exemplo n.º 7
0
 def build_object(dataset=None, **params):
     try:
         prepared_parameters = MatchedSequencesEncoder._prepare_parameters(**params)
         encoder = ReflectionHandler.get_class_by_name(MatchedSequencesEncoder.dataset_mapping[dataset.__class__.__name__],
                                                       "reference_encoding/")(**prepared_parameters)
     except ValueError:
         raise ValueError("{} is not defined for dataset of type {}.".format(MatchedSequencesEncoder.__name__,
                                                                             dataset.__class__.__name__))
     return encoder
Exemplo n.º 8
0
 def build_object(dataset=None, **params):
     try:
         prepared_params = Word2VecEncoder._prepare_parameters(**params)
         encoder = ReflectionHandler.get_class_by_name(
             Word2VecEncoder.dataset_mapping[dataset.__class__.__name__],
             "word2vec/")(**prepared_params)
     except ValueError:
         raise ValueError(
             "{} is not defined for dataset of type {}.".format(
                 Word2VecEncoder.__name__, dataset.__class__.__name__))
     return encoder
Exemplo n.º 9
0
    def parse(self, key: str, instruction: dict, symbol_table: SymbolTable, path: str = None) -> SubsamplingInstruction:

        valid_keys = ["type", "dataset", "subsampled_dataset_sizes", "dataset_export_formats"]
        ParameterValidator.assert_keys(instruction.keys(), valid_keys, SubsamplingParser.__name__, key)

        dataset_keys = symbol_table.get_keys_by_type(SymbolType.DATASET)
        ParameterValidator.assert_in_valid_list(instruction['dataset'], dataset_keys, SubsamplingParser.__name__, f'{key}/dataset')

        dataset = symbol_table.get(instruction['dataset'])
        ParameterValidator.assert_type_and_value(instruction['subsampled_dataset_sizes'], list, SubsamplingParser.__name__, f'{key}/subsampled_dataset_sizes')
        ParameterValidator.assert_all_type_and_value(instruction['subsampled_dataset_sizes'], int, SubsamplingParser.__name__,
                                                     f'{key}/subsampled_dataset_sizes', 1, dataset.get_example_count())

        valid_export_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataExporter, 'Exporter', "dataset_export/")
        ParameterValidator.assert_type_and_value(instruction['dataset_export_formats'], list, SubsamplingParser.__name__, f"{key}/dataset_export_formats")
        ParameterValidator.assert_all_in_valid_list(instruction['dataset_export_formats'], valid_export_formats, SubsamplingParser.__name__, f"{key}/dataset_export_formats")

        return SubsamplingInstruction(dataset=dataset, subsampled_dataset_sizes=instruction['subsampled_dataset_sizes'],
                                      dataset_export_formats=[ReflectionHandler.get_class_by_name(export_format + "Exporter", "dataset_export/")
                                                              for export_format in instruction['dataset_export_formats']], name=key)
Exemplo n.º 10
0
    def parse_instruction(key: str, instruction: dict,
                          symbol_table: SymbolTable, path) -> tuple:
        ParameterValidator.assert_keys_present(list(instruction.keys()),
                                               ["type"],
                                               InstructionParser.__name__, key)
        valid_instructions = [
            cls[:-6]
            for cls in ReflectionHandler.discover_classes_by_partial_name(
                "Parser", "dsl/instruction_parsers/")
        ]
        ParameterValidator.assert_in_valid_list(instruction["type"],
                                                valid_instructions,
                                                "InstructionParser", "type")

        parser = ReflectionHandler.get_class_by_name(
            "{}Parser".format(instruction["type"]), "instruction_parsers/")()
        instruction_object = parser.parse(key, instruction, symbol_table, path)

        symbol_table.add(key, SymbolType.INSTRUCTION, instruction_object)
        return instruction, symbol_table
Exemplo n.º 11
0
    def _parse_dataset(key: str, dataset_specs: dict, symbol_table: SymbolTable, result_path: str) -> SymbolTable:
        location = "ImportParser"

        ParameterValidator.assert_keys(list(dataset_specs.keys()), ImportParser.valid_keys, location, f"datasets:{key}", False)

        valid_formats = ReflectionHandler.all_nonabstract_subclass_basic_names(DataImport, "Import", "IO/dataset_import/")
        ParameterValidator.assert_in_valid_list(dataset_specs["format"], valid_formats, location, "format")

        import_cls = ReflectionHandler.get_class_by_name("{}Import".format(dataset_specs["format"]))
        params = ImportParser._prepare_params(dataset_specs, result_path, key)


        if "is_repertoire" in params:
            ParameterValidator.assert_type_and_value(params["is_repertoire"], bool, location, "is_repertoire")

            if params["is_repertoire"] == True:
                if import_cls != IReceptorImport:
                    assert "metadata_file" in params, f"{location}: Missing parameter: metadata_file under {key}/params/"
                    ParameterValidator.assert_type_and_value(params["metadata_file"], str, location, "metadata_file")

            if params["is_repertoire"] == False:
                assert "paired" in params, f"{location}: Missing parameter: paired under {key}/params/"
                ParameterValidator.assert_type_and_value(params["paired"], bool, location, "paired")

                if params["paired"] == True:
                    assert "receptor_chains" in params, f"{location}: Missing parameter: receptor_chains under {key}/params/"
                    ParameterValidator.assert_in_valid_list(params["receptor_chains"], ["_".join(cp.value) for cp in ChainPair], location, "receptor_chains")

        try:
            dataset = import_cls.import_dataset(params, key)
            dataset.name = key
            symbol_table.add(key, SymbolType.DATASET, dataset)
        except KeyError as key_error:
            raise KeyError(f"{key_error}\n\nAn error occurred during parsing of dataset {key}. "
                           f"The keyword {key_error.args[0]} was missing. This either means this argument was "
                           f"not defined under definitions/datasets/{key}/params, or this column was missing from "
                           f"an input data file. ")
        except Exception as ex:
            raise Exception(f"{ex}\n\nAn error occurred while parsing the dataset {key}. See the log above for more details.")

        return symbol_table
Exemplo n.º 12
0
def run_immuneML(namespace: argparse.Namespace):

    if os.path.isdir(namespace.result_path) and len(
            os.listdir(namespace.result_path)) != 0:
        raise ValueError(
            f"Directory {namespace.result_path} already exists. Please specify a new output directory for the analysis."
        )
    PathBuilder.build(namespace.result_path)

    logging.basicConfig(filename=namespace.result_path + "/log.txt",
                        level=logging.INFO,
                        format='%(asctime)s %(levelname)s: %(message)s')
    warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: logging.warning(
        message)

    if namespace.tool is None:
        app = ImmuneMLApp(namespace.specification_path, namespace.result_path)
    else:
        app_cls = ReflectionHandler.get_class_by_name(namespace.tool, "api/")
        app = app_cls(**vars(namespace))

    app.run()
Exemplo n.º 13
0
    def prepare_reference(reference_params: dict, location: str, paired: bool):
        ParameterValidator.assert_keys(list(reference_params.keys()),
                                       ["format", "params"], location,
                                       "reference")

        seq_import_params = reference_params[
            "params"] if "params" in reference_params else {}

        assert os.path.isfile(seq_import_params["path"]), f"{location}: the file {seq_import_params['path']} does not exist. " \
                                                  f"Specify the correct path under reference."

        if "paired" in seq_import_params:
            assert seq_import_params[
                "paired"] == paired, f"{location}: paired must be {paired} for SequenceImport"
        else:
            seq_import_params["paired"] = paired

        format_str = reference_params["format"]

        if format_str == "IRIS":  # todo refactor this when refactoring IRISSequenceImport
            receptors = IRISSequenceImport.import_items(**seq_import_params)
        else:
            import_class = ReflectionHandler.get_class_by_name(
                "{}Import".format(format_str))
            params = DefaultParamsLoader.load(
                EnvironmentSettings.default_params_path + "datasets/",
                DefaultParamsLoader.convert_to_snake_case(format_str))
            for key, value in seq_import_params.items():
                params[key] = value
            params["paired"] = paired

            processed_params = DatasetImportParams.build_object(**params)

            receptors = ImportHelper.import_items(
                import_class, reference_params["params"]["path"],
                processed_params)

        return receptors
Exemplo n.º 14
0
    def _get_implanting_strategy(key: str, signal: dict) -> SignalImplantingStrategy:

        valid_strategies = [cls[:-10] for cls in
                            ReflectionHandler.discover_classes_by_partial_name("Implanting", "simulation/signal_implanting_strategy/")]
        ParameterValidator.assert_in_valid_list(signal["implanting"], valid_strategies, "SignalParser", key)

        defaults = DefaultParamsLoader.load("signal_implanting_strategy/", f"{signal['implanting']}Implanting")
        signal = {**defaults, **signal}

        ParameterValidator.assert_keys_present(list(signal.keys()), ["motifs", "implanting", "sequence_position_weights"], SignalParser.__name__, key)

        implanting_comp = None
        if 'implanting_computation' in signal:
            implanting_comp = signal['implanting_computation'].lower()
            ParameterValidator.assert_in_valid_list(implanting_comp, [el.name.lower() for el in ImplantingComputation], SignalParser.__name__,
                                                    'implanting_computation')
            implanting_comp = ImplantingComputation[implanting_comp.upper()]

        implanting_strategy = ReflectionHandler.get_class_by_name(f"{signal['implanting']}Implanting")(GappedMotifImplanting(),
                                                                                                       signal["sequence_position_weights"],
                                                                                                       implanting_comp)

        return implanting_strategy
Exemplo n.º 15
0
 def import_encoder(config: MLMethodConfiguration, config_dir: str):
     encoder_class = ReflectionHandler.get_class_by_name(
         config.encoding_class)
     encoder = encoder_class.load_encoder(config_dir + config.encoding_file)
     return encoder
Exemplo n.º 16
0
 def parse_encoder_internal(short_class_name: str, encoder_params: dict):
     encoder_class = ReflectionHandler.get_class_by_name(
         f"{short_class_name}Encoder", "encodings")
     params = ObjectParser.get_all_params(
         {short_class_name: encoder_params}, "encodings", short_class_name)
     return encoder_class, params, params
Exemplo n.º 17
0
 def make_report_builder(self):
     report_builder = ReflectionHandler.get_class_by_name(f"{self.output['format']}Builder", "presentation/")
     return report_builder
Exemplo n.º 18
0
 def _prepare_sequence_encoder(self, params: EncoderParams):
     class_name = self.sequence_encoding.value
     sequence_encoder = ReflectionHandler.get_class_by_name(
         class_name, "encodings")
     return sequence_encoder
Exemplo n.º 19
0
 def test_get_class_by_name(self):
     cls = ReflectionHandler.get_class_by_name("KmerHelper", "util")
     self.assertEqual(KmerHelper, cls)
Exemplo n.º 20
0
    def parse(self,
              key: str,
              instruction: dict,
              symbol_table: SymbolTable,
              path: str = None) -> TrainMLModelInstruction:

        valid_keys = [
            "assessment", "selection", "dataset", "strategy", "labels",
            "metrics", "settings", "number_of_processes", "type", "reports",
            "optimization_metric", 'refit_optimal_model', 'store_encoded_data'
        ]
        ParameterValidator.assert_type_and_value(instruction['settings'], list,
                                                 TrainMLModelParser.__name__,
                                                 'settings')
        ParameterValidator.assert_keys(list(instruction.keys()), valid_keys,
                                       TrainMLModelParser.__name__,
                                       "TrainMLModel")
        ParameterValidator.assert_type_and_value(
            instruction['refit_optimal_model'], bool,
            TrainMLModelParser.__name__, 'refit_optimal_model')
        ParameterValidator.assert_type_and_value(instruction['metrics'], list,
                                                 TrainMLModelParser.__name__,
                                                 'metrics')
        ParameterValidator.assert_type_and_value(
            instruction['optimization_metric'], str,
            TrainMLModelParser.__name__, 'optimization_metric')
        ParameterValidator.assert_type_and_value(
            instruction['number_of_processes'], int,
            TrainMLModelParser.__name__, 'number_of_processes')
        ParameterValidator.assert_type_and_value(instruction['strategy'], str,
                                                 TrainMLModelParser.__name__,
                                                 'strategy')
        ParameterValidator.assert_type_and_value(
            instruction['store_encoded_data'], bool,
            TrainMLModelParser.__name__, 'store_encoded_data')

        settings = self._parse_settings(instruction, symbol_table)
        dataset = symbol_table.get(instruction["dataset"])
        assessment = self._parse_split_config(key, instruction, "assessment",
                                              symbol_table, len(settings))
        selection = self._parse_split_config(key, instruction, "selection",
                                             symbol_table, len(settings))
        assessment, selection = self._update_split_configs(
            assessment, selection, dataset)
        label_config = self._create_label_config(instruction, dataset, key)
        strategy = ReflectionHandler.get_class_by_name(
            instruction["strategy"], "hyperparameter_optimization/")
        metrics = {Metric[metric.upper()] for metric in instruction["metrics"]}
        optimization_metric = Metric[
            instruction["optimization_metric"].upper()]
        metric_search_criterion = Metric.get_search_criterion(
            optimization_metric)
        path = self._prepare_path(instruction)
        context = self._prepare_context(instruction, symbol_table)
        reports = self._prepare_reports(instruction["reports"], symbol_table)

        hp_instruction = TrainMLModelInstruction(
            dataset=dataset,
            hp_strategy=strategy(settings, metric_search_criterion),
            hp_settings=settings,
            assessment=assessment,
            selection=selection,
            metrics=metrics,
            optimization_metric=optimization_metric,
            refit_optimal_model=instruction['refit_optimal_model'],
            label_configuration=label_config,
            path=path,
            context=context,
            store_encoded_data=instruction['store_encoded_data'],
            number_of_processes=instruction["number_of_processes"],
            reports=reports,
            name=key)

        return hp_instruction