示例#1
0
    def _prepare_fit_pipeline(self, run_language):

        if self.options.negative_class_label is None:
            (
                self.options.positive_class_label,
                self.options.negative_class_label,
            ) = possibly_intuit_order(
                self.options.input,
                self.options.target_csv,
                self.options.target,
                self.options.unsupervised,
            )
        if self.options.unsupervised:
            self._set_target_type(TargetType.ANOMALY)
        elif self.options.negative_class_label is not None:
            self._set_target_type(TargetType.BINARY)
        else:
            self._set_target_type(TargetType.REGRESSION)

        options = self.options
        # functional pipeline is predictor pipeline
        # they are a little different for batch and server predictions.
        functional_pipeline_name = self._functional_pipelines[(self.run_mode,
                                                               run_language)]
        functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath(
            functional_pipeline_name)
        # fields to replace in the functional pipeline (predictor)

        replace_data = {
            "customModelPath":
            os.path.abspath(options.code_dir),
            "input_filename":
            options.input,
            "weights":
            '"{}"'.format(options.row_weights)
            if options.row_weights else "null",
            "weights_filename":
            '"{}"'.format(options.row_weights_csv)
            if options.row_weights_csv else "null",
            "target_column":
            '"{}"'.format(options.target) if options.target else "null",
            "target_filename":
            '"{}"'.format(options.target_csv)
            if options.target_csv else "null",
            "positiveClassLabel":
            '"{}"'.format(options.positive_class_label)
            if options.positive_class_label is not None else "null",
            "negativeClassLabel":
            '"{}"'.format(options.negative_class_label)
            if options.negative_class_label is not None else "null",
            "output_dir":
            options.output,
            "num_rows":
            options.num_rows,
        }

        functional_pipeline_str = CMRunnerUtils.render_file(
            functional_pipeline_filepath, replace_data)
        return functional_pipeline_str
示例#2
0
    def _prepare_prediction_server_or_batch_pipeline(self, run_language):
        options = self.options
        # functional pipeline is predictor pipeline
        # they are a little different for batch and server predictions.
        functional_pipeline_name = self._functional_pipelines[(self.run_mode,
                                                               run_language)]
        functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath(
            functional_pipeline_name)
        # fields to replace in the functional pipeline (predictor)
        replace_data = {
            "positiveClassLabel":
            '"{}"'.format(options.positive_class_label)
            if options.positive_class_label else "null",
            "negativeClassLabel":
            '"{}"'.format(options.negative_class_label)
            if options.negative_class_label else "null",
            "customModelPath":
            os.path.abspath(options.code_dir),
        }

        if self.run_mode == RunMode.SCORE:
            replace_data.update({
                "input_filename":
                options.input,
                "output_filename":
                '"{}"'.format(options.output) if options.output else "null",
            })

        functional_pipeline_str = CMRunnerUtils.render_file(
            functional_pipeline_filepath, replace_data)
        ret_pipeline = functional_pipeline_str

        if self.run_mode == RunMode.SERVER:
            with open(
                    CMRunnerUtils.get_pipeline_filepath(
                        EXTERNAL_SERVER_RUNNER), "r") as f:
                runner_pipeline_json = json.load(f)
                # can not use template for pipeline as quotes won't be escaped
                args = runner_pipeline_json["pipe"][0]["arguments"]
                # in server mode, predictor pipeline is passed to server as param
                args["pipeline"] = functional_pipeline_str
                args["repo"] = CMRunnerUtils.get_components_repo()
                host_port_list = options.address.split(":", 1)
                args["host"] = host_port_list[0]
                args["port"] = int(
                    host_port_list[1]) if len(host_port_list) == 2 else None
                args["threaded"] = options.threaded
                args["show_perf"] = options.show_perf
                ret_pipeline = json.dumps(runner_pipeline_json)
        return ret_pipeline
示例#3
0
    def _prepare_prediction_server_or_batch_pipeline(self, run_language):
        options = self.options
        functional_pipeline_name = (
            PREDICTION_SERVER_PIPELINE if self.run_mode == RunMode.SERVER else PREDICTOR_PIPELINE
        )
        functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath(functional_pipeline_name)

        # fields to replace in the pipeline
        replace_data = {
            "positiveClassLabel": '"{}"'.format(options.positive_class_label)
            if options.positive_class_label
            else "null",
            "negativeClassLabel": '"{}"'.format(options.negative_class_label)
            if options.negative_class_label
            else "null",
            "customModelPath": os.path.abspath(options.code_dir),
            "run_language": run_language.value,
        }
        if self.run_mode == RunMode.SCORE:
            replace_data.update(
                {
                    "input_filename": options.input,
                    "output_filename": '"{}"'.format(options.output) if options.output else "null",
                }
            )
        else:
            host_port_list = options.address.split(":", 1)
            host = host_port_list[0]
            port = int(host_port_list[1]) if len(host_port_list) == 2 else None
            replace_data.update(
                {
                    "host": host,
                    "port": port,
                    "threaded": str(options.threaded).lower(),
                    "show_perf": str(options.show_perf).lower(),
                }
            )

        functional_pipeline_str = CMRunnerUtils.render_file(
            functional_pipeline_filepath, replace_data
        )

        return functional_pipeline_str
示例#4
0
    def _prepare_prediction_server_or_batch_pipeline(self, run_language):
        options = self.options
        functional_pipeline_name = (SERVER_PIPELINE if self.run_mode
                                    == RunMode.SERVER else PREDICTOR_PIPELINE)
        functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath(
            functional_pipeline_name)

        # fields to replace in the pipeline
        replace_data = {
            "positiveClassLabel":
            '"{}"'.format(options.positive_class_label)
            if options.positive_class_label else "null",
            "negativeClassLabel":
            '"{}"'.format(options.negative_class_label)
            if options.negative_class_label else "null",
            "customModelPath":
            os.path.abspath(options.code_dir),
            "run_language":
            run_language.value,
            "monitor":
            options.monitor,
            "model_id":
            options.model_id,
            "deployment_id":
            options.deployment_id,
            "monitor_settings":
            options.monitor_settings,
        }

        if self.run_mode == RunMode.SCORE:
            replace_data.update({
                "input_filename":
                options.input,
                "output_filename":
                '"{}"'.format(options.output) if options.output else "null",
            })
        else:
            host_port_list = options.address.split(":", 1)
            host = host_port_list[0]
            port = int(host_port_list[1]) if len(host_port_list) == 2 else None
            replace_data.update({
                "host":
                host,
                "port":
                port,
                "show_perf":
                str(options.show_perf).lower(),
                "engine_type":
                "RestModelServing" if options.production else "Generic",
                "component_type":
                "uwsgi_serving" if options.production else "prediction_server",
                "uwsgi_max_workers":
                options.max_workers
                if getattr(options, "max_workers") else "null",
            })

        functional_pipeline_str = CMRunnerUtils.render_file(
            functional_pipeline_filepath, replace_data)

        if self.run_mode == RunMode.SERVER:
            if options.production:
                pipeline_json = json.loads(functional_pipeline_str)
                # Because of tech debt in MLPiper which requires that the modelFileSourcePath key
                # be filled with something, we're putting in a dummy file path here
                if json_fields.PIPELINE_SYSTEM_CONFIG_FIELD not in pipeline_json:
                    system_config = {
                        "modelFileSourcePath": os.path.abspath(__file__)
                    }
                pipeline_json[
                    json_fields.PIPELINE_SYSTEM_CONFIG_FIELD] = system_config
                functional_pipeline_str = json.dumps(pipeline_json)
        return functional_pipeline_str
示例#5
0
    def _prepare_fit_pipeline(self, run_language):

        if self.target_type.value in TargetType.CLASSIFICATION.value and (
                self.options.negative_class_label is None
                or self.options.class_labels is None):
            # No class label information was supplied, but we may be able to infer the labels
            possible_class_labels = possibly_intuit_order(
                self.options.input,
                self.options.target_csv,
                self.options.target,
                self.target_type == TargetType.ANOMALY,
            )
            if possible_class_labels is not None:
                if self.target_type == TargetType.BINARY:
                    if len(possible_class_labels) != 2:
                        raise DrumCommonException(
                            "Target type {} requires exactly 2 class labels. Detected {}: {}"
                            .format(TargetType.BINARY,
                                    len(possible_class_labels),
                                    possible_class_labels))
                    (
                        self.options.positive_class_label,
                        self.options.negative_class_label,
                    ) = possible_class_labels
                elif self.target_type == TargetType.MULTICLASS:
                    if len(possible_class_labels) < 2:
                        raise DrumCommonException(
                            "Target type {} requires more than 2 class labels. Detected {}: {}"
                            .format(
                                TargetType.MULTICLASS,
                                len(possible_class_labels),
                                possible_class_labels,
                            ))
                    self.options.class_labels = list(possible_class_labels)
            else:
                raise DrumCommonException(
                    "Target type {} requires class label information. No labels were supplied and "
                    "labels could not be inferred from the target.".format(
                        self.target_type.value))

        options = self.options
        # functional pipeline is predictor pipeline
        # they are a little different for batch and server predictions.
        functional_pipeline_name = self._functional_pipelines[(self.run_mode,
                                                               run_language)]
        functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath(
            functional_pipeline_name)
        # fields to replace in the functional pipeline (predictor)
        replace_data = {
            "customModelPath": os.path.abspath(options.code_dir),
            "input_filename": options.input,
            "weights": options.row_weights,
            "weights_filename": options.row_weights_csv,
            "target_column": options.target,
            "target_filename": options.target_csv,
            "positiveClassLabel": options.positive_class_label,
            "negativeClassLabel": options.negative_class_label,
            "classLabels": options.class_labels,
            "output_dir": options.output,
            "num_rows": options.num_rows,
            "sparse_column_file": options.sparse_column_file,
        }

        functional_pipeline_str = CMRunnerUtils.render_file(
            functional_pipeline_filepath, replace_data)
        return functional_pipeline_str