示例#1
0
    def on_finish(self, return_code):
        # Fix one case process has already finish but message not handle over, last check before finish
        messages = self.output_queue.get_all()
        logger.info(
            f"Finish, check queue: {messages}, returncode={return_code}")
        for m in messages:
            self.on_receive_message(m)

        self._on_finish(return_code)
示例#2
0
    def execute(self):
        """ Process body

        Args:
            in_queue: blocking queue
            out_queue:

        Returns:

        """

        log_dir = P.dirname(self.logfile)
        if not P.exists(log_dir):
            os.makedirs(log_dir, exist_ok=True)

        logger.info(f"Run command{self.command}, log file at: {self.logfile}")

        with open(self.logfile, 'w') as f:
            time_start = time.time()
            # 1. start process
            proc = subprocess.Popen(self.command,
                                    shell=True,
                                    stdout=f,
                                    stderr=f,
                                    bufsize=-1,
                                    env=self.env)
            self.pid = proc.pid

            # 2. update status
            # r1 = Process.objects.filter(id=self.id).update(status=Process.Status.Running,
            #                                                pid=self.pid,
            #                                                returncode=proc.returncode,
            #                                                update_date_time=datetime.datetime.now())
            # if r1 < 1:  # update nothing
            #     raise Exception(f"Process id={self.id}, command=${self.command} running but does not create in db.")

            # 3. wait for end
            while proc.poll() is None:
                time.sleep(0.1)
            if proc.returncode != 0:
                logger.error(
                    f"Process run failed, returncode is {proc.returncode}, the log at:{self.logfile}"
                )

            returncode = proc.returncode

        return returncode
示例#3
0
    load_extension = {"n_cols": df.shape[0], "n_rows": df.shape[1]}

except Exception as e:
    load_status = JobStep.Status.Failed
    raise e
finally:
    client.batch_predict_callback(portal=server_portal,
                                  dataset_name=dataset_name,
                                  model_name=model_name,
                                  batch_predict_job_name=job_name,
                                  type=PredictStepType.Load,
                                  status=load_status,
                                  took=time.time() - t_load_start,
                                  extension=load_extension)
    logger.info("Load dataset finished. ")

# [3]. load model
t_load_model_start = time.time()
load_model_status = JobStep.Status.Succeed
load_model_extension = None
try:
    model_dict = client.retrieve_model(portal=server_portal,
                                       dataset_name=dataset_name,
                                       model_name=model_name)
    features = model_dict['inputs']

    logger.info("Before cast type: ")
    logger.info(df.dtypes)
    X = dataset_util.cast_df(df, features, remove_unnecessary_cols=True)
    logger.info("After cast type: ")
示例#4
0
        "n_cols_used": analyzer.n_cols,
        "n_rows": analyzer.n_rows,
        "n_cols": analyzer.n_cols,
    }
except Exception as e:
    load_status = JobStep.Status.Failed
    raise e
finally:
    client.analyze_callback(portal=server_portal,
                            dataset_name=dataset_name,
                            analyze_job_name=job_name,
                            type=AnalyzeStep.Types.Load,
                            status=load_status,
                            took=util.time_diff(time.time(), t),
                            extension=load_extension)
    logger.info("Load dataset finished. ")

# [3]. do analyze
t = time.time()
analyze_extension = None
analyze_status = JobStep.Status.Succeed
try:
    dataset_stats = analyzer.do_analyze_csv()
    hints = []
    if dataset_stats.n_cols > 1000:
        hints.append({
            "type":
            "Warning",
            "message":
            "More than 1,000 columns dataset requires a long time to train."
        })