Exemplo n.º 1
0
    def configure(self, params):
        super(ExternalRunner, self).configure(params)

        self._shmem_format = self._params.get("shmem_format")
        if self._shmem_format is None:
            self._shmem_format = ShmemFormat.PICKLE

        self._in_mmap_file = ShmemUtils.create_in_mem_file(self._shmem_format)
        self._out_mmap_file = ShmemUtils.create_out_mem_file(
            self._shmem_format)

        replace_data = {
            "input_filename": self._in_mmap_file.name,
            "output_filename": '"{}"'.format(self._out_mmap_file.name),
        }

        pipeline_str = self._params.get("pipeline")
        pipeline_str = Template(pipeline_str).render(replace_data)
        pipeline_str = self._fix_pipeline(pipeline_str)

        comp_repo = self._params.get("repo")

        config = ExecutorConfig(
            pipeline=pipeline_str,
            pipeline_file=None,
            run_locally=True,
            comp_root_path=comp_repo,
            mlpiper_jar=os.path.join(MLPiperRunner.SCRIPT_DIR, "..", "jars",
                                     "mlpiper.jar"),
            spark_jars=None,
        )

        self._pipeline_executor = Executor(config)
        self._pipeline_executor.init_pipeline()
Exemplo n.º 2
0
    def _run_fit_and_predictions_pipelines_in_mlpiper(self):
        if self.run_mode == RunMode.SERVER:
            run_language = self._check_artifacts_and_get_run_language()
            # in prediction server mode infra pipeline == prediction server runner pipeline
            infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(
                run_language)
        elif self.run_mode == RunMode.SCORE:
            run_language = self._check_artifacts_and_get_run_language()
            tmp_output_filename = None
            # if output is not provided, output into tmp file and print
            if not self.options.output:
                # keep object reference so it will be destroyed only in the end of the process
                __tmp_output_file = tempfile.NamedTemporaryFile(mode="w")
                self.options.output = tmp_output_filename = __tmp_output_file.name
            # in batch prediction mode infra pipeline == predictor pipeline
            infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline(
                run_language)
        elif self.run_mode == RunMode.FIT:
            run_language = self._get_fit_run_language()
            infra_pipeline_str = self._prepare_fit_pipeline(run_language)
        else:
            error_message = "{} mode is not supported here".format(
                self.run_mode)
            print(error_message)
            raise DrumCommonException(error_message)

        config = ExecutorConfig(
            pipeline=infra_pipeline_str,
            pipeline_file=None,
            run_locally=True,
            comp_root_path=CMRunnerUtils.get_components_repo(),
            mlpiper_jar=None,
            spark_jars=None,
        )

        _pipeline_executor = Executor(config).standalone(True).set_verbose(
            self.options.verbose)
        # assign logger with the name drum.mlpiper.Executor to mlpiper Executor
        _pipeline_executor.set_logger(
            logging.getLogger(LOGGER_NAME_PREFIX + "." +
                              _pipeline_executor.logger_name()))

        self.logger.info(">>> Start {} in the {} mode".format(
            ArgumentsOptions.MAIN_COMMAND, self.run_mode.value))
        sc = StatsCollector(disable_instance=(
            not hasattr(self.options, "show_perf")
            or not self.options.show_perf or self.run_mode == RunMode.SERVER))
        sc.register_report("Full time", "end", StatsOperation.SUB, "start")
        sc.register_report("Init time (incl model loading)", "init",
                           StatsOperation.SUB, "start")
        sc.register_report("Run time (incl reading CSV)", "run",
                           StatsOperation.SUB, "init")
        with verbose_stdout(self.options.verbose):
            sc.enable()
            try:
                sc.mark("start")

                _pipeline_executor.init_pipeline()
                self.runtime.initialization_succeeded = True
                sc.mark("init")

                _pipeline_executor.run_pipeline(cleanup=False)
                sc.mark("run")
            finally:
                _pipeline_executor.cleanup_pipeline()
                sc.mark("end")
                sc.disable()
        self.logger.info("<<< Finish {} in the {} mode".format(
            ArgumentsOptions.MAIN_COMMAND, self.run_mode.value))
        sc.print_reports()
        if self.run_mode == RunMode.SCORE:
            # print result if output is not provided
            if tmp_output_filename:
                print(pd.read_csv(tmp_output_filename))
Exemplo n.º 3
0
class ExternalRunner(ConnectableComponent):
    def __init__(self, engine):
        super(ExternalRunner, self).__init__(engine)
        self._pipeline_executor = None

        self._in_mmap_file = None
        self._out_mmap_file = None

        self._shmem_format = None

    def __del__(self):
        self._in_mmap_file.close()
        self._out_mmap_file.close()

    def _fix_pipeline(self, pipeline_str):
        pipeline_json = json.loads(pipeline_str)

        if json_fields.PIPELINE_SYSTEM_CONFIG_FIELD not in pipeline_json:
            system_config = {}
            pipeline_json[
                json_fields.PIPELINE_SYSTEM_CONFIG_FIELD] = system_config
        return json.dumps(pipeline_json)

    def configure(self, params):
        super(ExternalRunner, self).configure(params)

        self._shmem_format = self._params.get("shmem_format")
        if self._shmem_format is None:
            self._shmem_format = ShmemFormat.PICKLE

        self._in_mmap_file = ShmemUtils.create_in_mem_file(self._shmem_format)
        self._out_mmap_file = ShmemUtils.create_out_mem_file(
            self._shmem_format)

        replace_data = {
            "input_filename": self._in_mmap_file.name,
            "output_filename": '"{}"'.format(self._out_mmap_file.name),
        }

        pipeline_str = self._params.get("pipeline")
        pipeline_str = Template(pipeline_str).render(replace_data)
        pipeline_str = self._fix_pipeline(pipeline_str)

        comp_repo = self._params.get("repo")

        config = ExecutorConfig(
            pipeline=pipeline_str,
            pipeline_file=None,
            run_locally=True,
            comp_root_path=comp_repo,
            mlpiper_jar=os.path.join(MLPiperRunner.SCRIPT_DIR, "..", "jars",
                                     "mlpiper.jar"),
            spark_jars=None,
        )

        self._pipeline_executor = Executor(config)
        self._pipeline_executor.init_pipeline()

    def _get_out_df(self):
        return ShmemUtils.read_out_mem_file(self._shmem_format,
                                            self._out_mmap_file)

    def _clean_out_mem(self):
        pass

    def _set_in_df(self, in_df):
        ShmemUtils.write_in_mem_file(self._shmem_format, in_df,
                                     self._in_mmap_file)

    def _run_pipeline(self):
        self._pipeline_executor.run_pipeline(cleanup=False)

    def _cleanup_pipeline(self):
        self._pipeline_executor.cleanup_pipeline()

    def _materialize(self, parent_data_objs, user_data):
        self._pipeline_executor.go()
        return []