def _prepare_prediction_server_or_batch_pipeline(self, run_language): options = self.options # functional pipeline is predictor pipeline # they are a little different for batch and server predictions. functional_pipeline_name = self._functional_pipelines[(self.run_mode, run_language)] functional_pipeline_filepath = CMRunnerUtils.get_pipeline_filepath( functional_pipeline_name) # fields to replace in the functional pipeline (predictor) replace_data = { "positiveClassLabel": '"{}"'.format(options.positive_class_label) if options.positive_class_label else "null", "negativeClassLabel": '"{}"'.format(options.negative_class_label) if options.negative_class_label else "null", "customModelPath": os.path.abspath(options.code_dir), } if self.run_mode == RunMode.SCORE: replace_data.update({ "input_filename": options.input, "output_filename": '"{}"'.format(options.output) if options.output else "null", }) functional_pipeline_str = CMRunnerUtils.render_file( functional_pipeline_filepath, replace_data) ret_pipeline = functional_pipeline_str if self.run_mode == RunMode.SERVER: with open( CMRunnerUtils.get_pipeline_filepath( EXTERNAL_SERVER_RUNNER), "r") as f: runner_pipeline_json = json.load(f) # can not use template for pipeline as quotes won't be escaped args = runner_pipeline_json["pipe"][0]["arguments"] # in server mode, predictor pipeline is passed to server as param args["pipeline"] = functional_pipeline_str args["repo"] = CMRunnerUtils.get_components_repo() host_port_list = options.address.split(":", 1) args["host"] = host_port_list[0] args["port"] = int( host_port_list[1]) if len(host_port_list) == 2 else None args["threaded"] = options.threaded args["show_perf"] = options.show_perf ret_pipeline = json.dumps(runner_pipeline_json) return ret_pipeline
def _run_fit_and_predictions_pipelines_in_mlpiper(self): if self.run_mode == RunMode.SERVER: run_language = self._check_artifacts_and_get_run_language() # in prediction server mode infra pipeline == prediction server runner pipeline infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline( run_language) elif self.run_mode == RunMode.SCORE: run_language = self._check_artifacts_and_get_run_language() tmp_output_filename = None # if output is not provided, output into tmp file and print if not self.options.output: # keep object reference so it will be destroyed only in the end of the process __tmp_output_file = tempfile.NamedTemporaryFile(mode="w") self.options.output = tmp_output_filename = __tmp_output_file.name # in batch prediction mode infra pipeline == predictor pipeline infra_pipeline_str = self._prepare_prediction_server_or_batch_pipeline( run_language) elif self.run_mode == RunMode.FIT: run_language = self._get_fit_run_language() infra_pipeline_str = self._prepare_fit_pipeline(run_language) else: error_message = "{} mode is not supported here".format( self.run_mode) print(error_message) raise DrumCommonException(error_message) config = ExecutorConfig( pipeline=infra_pipeline_str, pipeline_file=None, run_locally=True, comp_root_path=CMRunnerUtils.get_components_repo(), mlpiper_jar=None, spark_jars=None, ) _pipeline_executor = Executor(config).standalone(True).set_verbose( self.options.verbose) # assign logger with the name drum.mlpiper.Executor to mlpiper Executor _pipeline_executor.set_logger( logging.getLogger(LOGGER_NAME_PREFIX + "." + _pipeline_executor.logger_name())) self.logger.info(">>> Start {} in the {} mode".format( ArgumentsOptions.MAIN_COMMAND, self.run_mode.value)) sc = StatsCollector(disable_instance=( not hasattr(self.options, "show_perf") or not self.options.show_perf or self.run_mode == RunMode.SERVER)) sc.register_report("Full time", "end", StatsOperation.SUB, "start") sc.register_report("Init time (incl model loading)", "init", StatsOperation.SUB, "start") sc.register_report("Run time (incl reading CSV)", "run", StatsOperation.SUB, "init") with verbose_stdout(self.options.verbose): sc.enable() try: sc.mark("start") _pipeline_executor.init_pipeline() self.runtime.initialization_succeeded = True sc.mark("init") _pipeline_executor.run_pipeline(cleanup=False) sc.mark("run") finally: _pipeline_executor.cleanup_pipeline() sc.mark("end") sc.disable() self.logger.info("<<< Finish {} in the {} mode".format( ArgumentsOptions.MAIN_COMMAND, self.run_mode.value)) sc.print_reports() if self.run_mode == RunMode.SCORE: # print result if output is not provided if tmp_output_filename: print(pd.read_csv(tmp_output_filename))