Exemplo n.º 1
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting and setting[
                        "preprocessing"] is not None:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                        if not all(preproc.keeps_example_count()
                                   for preproc in preprocessing_sequence):
                            raise ValueError(
                                f"{TrainMLModelParser.__name__}: preprocessing sequence {preproc_name} includes preprocessing that "
                                f"change the number of examples at runtime and as such cannot be used with this instruction. See the "
                                f"documentation for the preprocessing or alternatively use them with other instructions."
                            )
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                ml_method = symbol_table.get(setting["ml_method"])
                ml_method.check_encoder_compatibility(encoder)

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=ml_method,
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
Exemplo n.º 2
0
    def _parse_settings(self, instruction: dict,
                        symbol_table: SymbolTable) -> list:
        try:
            settings = []
            for index, setting in enumerate(instruction["settings"]):
                if "preprocessing" in setting:
                    ParameterValidator.assert_type_and_value(
                        setting["preprocessing"], str,
                        TrainMLModelParser.__name__, f'settings: {index+1}. '
                        f'element: preprocessing')
                    if symbol_table.contains(setting["preprocessing"]):
                        preprocessing_sequence = symbol_table.get(
                            setting["preprocessing"])
                        preproc_name = setting["preprocessing"]
                    else:
                        raise KeyError(
                            f"{TrainMLModelParser.__name__}: preprocessing was set in the TrainMLModel instruction to value "
                            f"{setting['preprocessing']}, but no such preprocessing was defined in the specification under "
                            f"definitions: {PreprocessingParser.keyword}.")
                else:
                    setting["preprocessing"] = None
                    preprocessing_sequence = []
                    preproc_name = None

                ParameterValidator.assert_keys(
                    setting.keys(), ["preprocessing", "ml_method", "encoding"],
                    TrainMLModelParser.__name__,
                    f"settings, {index + 1}. entry")

                encoder = symbol_table.get(setting["encoding"]).build_object(symbol_table.get(instruction["dataset"]),
                                                                             **symbol_table.get_config(setting["encoding"])["encoder_params"])\
                    .set_context({"dataset": symbol_table.get(instruction['dataset'])})

                s = HPSetting(encoder=encoder,
                              encoder_name=setting["encoding"],
                              encoder_params=symbol_table.get_config(
                                  setting["encoding"])["encoder_params"],
                              ml_method=symbol_table.get(setting["ml_method"]),
                              ml_method_name=setting["ml_method"],
                              ml_params=symbol_table.get_config(
                                  setting["ml_method"]),
                              preproc_sequence=preprocessing_sequence,
                              preproc_sequence_name=preproc_name)
                settings.append(s)
            return settings
        except KeyError as key_error:
            raise KeyError(
                f"{TrainMLModelParser.__name__}: parameter {key_error.args[0]} was not defined under settings in TrainMLModel instruction."
            )
Exemplo n.º 3
0
    def _prepare_optional_params(self, analysis: dict,
                                 symbol_table: SymbolTable,
                                 yaml_location: str) -> dict:

        params = {}
        dataset = symbol_table.get(analysis["dataset"])

        if "encoding" in analysis:
            params["encoder"] = symbol_table.get(
                analysis["encoding"]).build_object(
                    dataset,
                    **symbol_table.get_config(
                        analysis["encoding"])["encoder_params"])

            if "labels" in analysis:
                params["label_config"] = LabelHelper.create_label_config(
                    analysis["labels"], dataset,
                    ExploratoryAnalysisParser.__name__, yaml_location)
            else:
                params["label_config"] = LabelConfiguration()

        if "preprocessing_sequence" in analysis:
            params["preprocessing_sequence"] = symbol_table.get(
                analysis["preprocessing_sequence"])

        return params
    def _prepare_optional_params(self, analysis: dict, symbol_table: SymbolTable) -> dict:

        params = {}
        dataset = symbol_table.get(analysis["dataset"])


        if "encoding" in analysis:
            params["encoder"] = symbol_table.get(analysis["encoding"]).build_object(dataset, **symbol_table.get_config(analysis["encoding"])["encoder_params"])
            params["label_config"] = LabelConfiguration()

            if "labels" in analysis:
                for label in analysis["labels"]:
                    label_values = self._get_label_values(label, dataset)
                    params["label_config"].add_label(label, label_values)

        if "preprocessing_sequence" in analysis:
            params["preprocessing_sequence"] = symbol_table.get(analysis["preprocessing_sequence"])

        if "number_of_processes" in analysis:
            params["number_of_processes"] = analysis["number_of_processes"]

        return params