def on_finish(self, return_code): # Fix one case process has already finish but message not handle over, last check before finish messages = self.output_queue.get_all() logger.info( f"Finish, check queue: {messages}, returncode={return_code}") for m in messages: self.on_receive_message(m) self._on_finish(return_code)
def execute(self): """ Process body Args: in_queue: blocking queue out_queue: Returns: """ log_dir = P.dirname(self.logfile) if not P.exists(log_dir): os.makedirs(log_dir, exist_ok=True) logger.info(f"Run command{self.command}, log file at: {self.logfile}") with open(self.logfile, 'w') as f: time_start = time.time() # 1. start process proc = subprocess.Popen(self.command, shell=True, stdout=f, stderr=f, bufsize=-1, env=self.env) self.pid = proc.pid # 2. update status # r1 = Process.objects.filter(id=self.id).update(status=Process.Status.Running, # pid=self.pid, # returncode=proc.returncode, # update_date_time=datetime.datetime.now()) # if r1 < 1: # update nothing # raise Exception(f"Process id={self.id}, command=${self.command} running but does not create in db.") # 3. wait for end while proc.poll() is None: time.sleep(0.1) if proc.returncode != 0: logger.error( f"Process run failed, returncode is {proc.returncode}, the log at:{self.logfile}" ) returncode = proc.returncode return returncode
load_extension = {"n_cols": df.shape[0], "n_rows": df.shape[1]} except Exception as e: load_status = JobStep.Status.Failed raise e finally: client.batch_predict_callback(portal=server_portal, dataset_name=dataset_name, model_name=model_name, batch_predict_job_name=job_name, type=PredictStepType.Load, status=load_status, took=time.time() - t_load_start, extension=load_extension) logger.info("Load dataset finished. ") # [3]. load model t_load_model_start = time.time() load_model_status = JobStep.Status.Succeed load_model_extension = None try: model_dict = client.retrieve_model(portal=server_portal, dataset_name=dataset_name, model_name=model_name) features = model_dict['inputs'] logger.info("Before cast type: ") logger.info(df.dtypes) X = dataset_util.cast_df(df, features, remove_unnecessary_cols=True) logger.info("After cast type: ")
"n_cols_used": analyzer.n_cols, "n_rows": analyzer.n_rows, "n_cols": analyzer.n_cols, } except Exception as e: load_status = JobStep.Status.Failed raise e finally: client.analyze_callback(portal=server_portal, dataset_name=dataset_name, analyze_job_name=job_name, type=AnalyzeStep.Types.Load, status=load_status, took=util.time_diff(time.time(), t), extension=load_extension) logger.info("Load dataset finished. ") # [3]. do analyze t = time.time() analyze_extension = None analyze_status = JobStep.Status.Succeed try: dataset_stats = analyzer.do_analyze_csv() hints = [] if dataset_stats.n_cols > 1000: hints.append({ "type": "Warning", "message": "More than 1,000 columns dataset requires a long time to train." })