def configure(self, params): super(ExternalRunner, self).configure(params) self._shmem_format = self._params.get("shmem_format") if self._shmem_format is None: self._shmem_format = ShmemFormat.PICKLE self._in_mmap_file = ShmemUtils.create_in_mem_file(self._shmem_format) self._out_mmap_file = ShmemUtils.create_out_mem_file( self._shmem_format) replace_data = { "input_filename": self._in_mmap_file.name, "output_filename": '"{}"'.format(self._out_mmap_file.name), } pipeline_str = self._params.get("pipeline") pipeline_str = Template(pipeline_str).render(replace_data) pipeline_str = self._fix_pipeline(pipeline_str) comp_repo = self._params.get("repo") config = ExecutorConfig( pipeline=pipeline_str, pipeline_file=None, run_locally=True, comp_root_path=comp_repo, mlpiper_jar=os.path.join(MLPiperRunner.SCRIPT_DIR, "..", "jars", "mlpiper.jar"), spark_jars=None, ) self._pipeline_executor = Executor(config) self._pipeline_executor.init_pipeline()
def _run_fit_and_predictions_pipelines_in_mlpiper(self): if self.run_mode == RunMode.SERVER: run_language = self._check_artifacts_and_get_run_language() # in prediction server mode infra pipeline == prediction server runner pipeline infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline( run_language) elif self.run_mode == RunMode.SCORE: run_language = self._check_artifacts_and_get_run_language() tmp_output_filename = None # if output is not provided, output into tmp file and print if not self.options.output: # keep object reference so it will be destroyed only in the end of the process __tmp_output_file = tempfile.NamedTemporaryFile(mode="w") self.options.output = tmp_output_filename = __tmp_output_file.name # in batch prediction mode infra pipeline == predictor pipeline infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline( run_language) elif self.run_mode == RunMode.FIT: run_language = self._get_fit_run_language() infra_pipeline_str = self._prepare_fit_pipeline(run_language) else: error_message = "{} mode is not supported here".format( self.run_mode) print(error_message) raise DrumCommonException(error_message) config = ExecutorConfig( pipeline=infra_pipeline_str, pipeline_file=None, run_locally=True, comp_root_path=CMRunnerUtils.get_components_repo(), mlpiper_jar=None, spark_jars=None, ) _pipeline_executor = Executor(config).standalone(True).set_verbose( self.options.verbose) # assign logger with the name drum.mlpiper.Executor to mlpiper Executor _pipeline_executor.set_logger( logging.getLogger(LOGGER_NAME_PREFIX + "." + _pipeline_executor.logger_name())) self.logger.info(">>> Start {} in the {} mode".format( ArgumentsOptions.MAIN_COMMAND, self.run_mode.value)) sc = StatsCollector(disable_instance=( not hasattr(self.options, "show_perf") or not self.options.show_perf or self.run_mode == RunMode.SERVER)) sc.register_report("Full time", "end", StatsOperation.SUB, "start") sc.register_report("Init time (incl model loading)", "init", StatsOperation.SUB, "start") sc.register_report("Run time (incl reading CSV)", "run", StatsOperation.SUB, "init") with verbose_stdout(self.options.verbose): sc.enable() try: sc.mark("start") _pipeline_executor.init_pipeline() self.runtime.initialization_succeeded = True sc.mark("init") _pipeline_executor.run_pipeline(cleanup=False) sc.mark("run") finally: _pipeline_executor.cleanup_pipeline() sc.mark("end") sc.disable() self.logger.info("<<< Finish {} in the {} mode".format( ArgumentsOptions.MAIN_COMMAND, self.run_mode.value)) sc.print_reports() if self.run_mode == RunMode.SCORE: # print result if output is not provided if tmp_output_filename: print(pd.read_csv(tmp_output_filename))