示例#1
0
    def create_instance(self, cron_conf, etl_day):
        instance_list = []
        cron_log = models.CronLog().create_cron_log(cron_conf, etl_day)
        if cron_conf.task_id == State.ROOT_TASK:
            job_list = models.TaskDefine().get_valid_job()
            models.TaskInstance().create_job_task_instance(
                execute_date=etl_day, job_list=job_list)
        elif cron_conf.type == State.TASK_CRON:
            instance_list = models.TaskInstance().create_cron_task_instance(
                execute_date=etl_day, cron_list=[cron_conf])
        else:
            err = "task_id:{} name:{} type:{}, unknow task type".format(
                cron_conf.task_id, cron_conf.name, cron_conf.type)
            log.logger.info("{}".format(err))
            cron_log.update_cron_status(State.SHUTDOWN)
            return

        for instance in instance_list:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
        cron_log.update_cron_status(State.SUCCESS)
示例#2
0
 def run_all_job(self, date_list):
     """
     run_all_job
     :param date_list:
     :return:
     """
     job_list = models.TaskDefine().get_valid_job()
     msg = ""
     for etl_day in date_list:
         print(len(job_list), etl_day)
         models.TaskInstance().create_job_task_instance(etl_day, job_list)
     msg = "generate {} job task instance ".format(
         len(date_list) * len(job_list))
     return msg
示例#3
0
    def rerun_task(self,
                   task_id,
                   date_list,
                   up_and_down=False,
                   run_up=False,
                   run_down=False,
                   force=False):
        """
        rerun_task
        :param task_id: task_id
        :param date_list: range list
        :param run_up:run upstream
        :param run_down: run downstream
        :return:
        """
        kafka_utils.setup_kafka(config.G_Conf.Common.Broker)
        # run job define
        instance_list = []
        msg = ""
        run_type = ""
        for loop in ["looop"]:
            _ = loop
            job = models.TaskDefine().get_job_by_task_id(
                task_id_list=[task_id])
            if job and len(job) > 0:
                run_type = "job"
                job_list = []
                if run_up:
                    job_list = models.TaskDependency().get_all_upstream(
                        task_id)
                elif run_down:
                    job_list = models.TaskDependency().get_all_downstream(
                        task_id)
                elif up_and_down:
                    up_job = models.TaskDependency().get_all_upstream(task_id)
                    down_job = models.TaskDependency().get_all_downstream(
                        task_id)
                    if len(up_job) > 0:
                        job_list.extend(up_job)
                    if len(down_job) > 0:
                        job_list.extend(down_job)
                else:
                    # run a job with force
                    if force:
                        for etl_day in date_list:
                            tmp = models.TaskInstance(
                            ).direct_run_single_job_task(etl_day, job)
                            if tmp > 0:
                                instance_list.extend(tmp)
                        break
                    else:
                        # run single waiting dependency
                        pass

                # add self
                job_list.append(task_id)
                need_run_job_list = models.TaskDefine().get_job_by_task_id(
                    task_id_list=job_list)
                if need_run_job_list and len(need_run_job_list) > 0:
                    for etl_day in date_list:
                        models.TaskInstance().create_job_task_instance(
                            execute_date=etl_day, job_list=need_run_job_list)
                    msg = "generate {} TaskDefine task instance ".format(
                        len(need_run_job_list) * len(date_list))
                break

            # run cron task
            cron = models.CronConf().get_cron_by_task_id(task_id=task_id)
            if cron:
                run_type = "cron"
                for etl_day in date_list:
                    tmp = models.TaskInstance().create_cron_task_instance(
                        execute_date=etl_day, cron_list=[cron])
                    if len(tmp) > 0:
                        instance_list.extend(tmp)
                break

        for instance in instance_list:
            err = kafka_utils.PushMsgWithRetry(
                kafka_utils.TOPIC_DISPATCHER,
                kafka_utils.TaskBeginMsg(instance_id=instance.id,
                                         task_id=instance.task_id,
                                         execute_date=instance.etl_day))
            log.logger.info("push task to queue, instance {}, err {}".format(
                instance, err))
        if len(instance_list) > 0:
            msg = "generate {} {} task instance ".format(
                len(instance_list), run_type)
        return msg
示例#4
0
    def run(self):
        #执行次数
        running_times = 0
        msg = None
        try:
            while running_times <= self.retry:
                task_runner = BashTaskRunner(self.instance)
                self.begin_time = time.time()
                self.instance.worker_retry = running_times
                #提交运行状态
                should_run = self.instance.start_running(
                    retry=(True if running_times > 0 else False))
                if should_run is not None:
                    log.logger.info("{}".format(should_run))
                    msg = None
                    break
                ret = self.inner_run(task_runner, running_times)
                if ret is None:
                    self.instance.stop_running(State.SUCCESS)
                    kafka_utils.PushMsgWithRetry(
                        kafka_utils.TOPIC_TASK_RESULT,
                        kafka_utils.TaskOverMsg(
                            instance_id=self.instance.id,
                            task_id=self.instance.task_id,
                            status=State.SUCCESS,
                            execute_date=self.instance.etl_day))
                    msg = None
                    break
                else:
                    msg = "the {} times running:{}".format(running_times, ret)
                    if self.instance.status == State.KILLED:
                        # if instance is killd, should stop running
                        break
                    elif self.instance.status == State.TIMEOUT:
                        self.instance.stop_running(State.TIMEOUT)
                    else:
                        self.instance.stop_running(State.FAILED)

                    if running_times < self.retry:
                        msg = "{}, after {} seconds will try the {} times ".format(
                            msg, self.step_seconds * (running_times + 1),
                            running_times + 1)
                    log.logger.error(msg)
                running_times += 1
                if running_times <= self.retry:
                    time.sleep(self.step_seconds * running_times)
            else:
                msg = "reach the max retry times {} with err:{}, stop running".format(
                    self.retry, msg)
                log.logger.info(msg)

        except Exception as e:
            msg = "get Exception {}.{}".format(type(e), str(e))
            log.logger.error(msg)
        finally:
            process_utils.ref_counter.unref()
            if msg is not None:
                keeper = "unknown"
                log.logger.error("run {}, err: {}".format(self.instance, msg))
                if self.instance.task_type == State.TASK_JOB:
                    job_list = models.TaskDefine().get_job_by_task_id(
                        [self.instance.task_id])
                    if len(job_list) > 0:
                        keeper = "{}({})".format(self.instance.task_id,
                                                 job_list[0].keeper)
                    else:
                        # should not come here
                        keeper = "{}".format(self.instance.task_id)
                elif self.instance.task_type == State.TASK_EXTRACT:
                    keeper = "{}(rule_id:{})".format(self.instance.task_id,
                                                     self.instance.sub_task_id)
                elif self.instance.task_type == State.TASK_CRON:
                    keeper = "{}(定时任务)".format(self.instance.task_id)
                else:
                    pass
                msg = "\nTask: {} \nError: {} \nContext: {}".format(
                    keeper, msg, self.instance)
                process_utils.Alert(msg)
        return