def process_updated_tasks(): """ 处理任务状态变更 :return: """ global last_check_time logger.info( 'process_updated_tasks, last check time={}'.format(last_check_time)) # tasks = TaskOpt.get_all_need_check_task(last_time=last_check_time-timedelta(seconds=1)) try: db_session = ScopedSession() last_time = last_check_time - timedelta(seconds=1) tasks = db_session.query(Task.id, Task.status, Task.last_update) \ .filter(and_(Task.status.in_(('pausing', 'running', 'cancelled')), Task.last_update >= last_time)).all() last_check_time = datetime.now() for task_id, status, last_update in tasks: logger.info( 'process_updated_tasks task id={}, status={}, last_update={}'. format(task_id, status, last_update)) if last_update >= last_check_time: if status == 'cancelled': cancel_task(task_id) elif status == 'pausing': pause_task(task_id) elif status == 'running': resume_task(task_id) except Exception as e: logger.exception( 'process_updated_tasks catch exception e={}'.format(e)) db_session.rollback() finally: ScopedSession.remove()
def update_area_status(): """ 根据运行结果实时更新agent的忙碌程度 :return: """ try: db_session = ScopedSession() area_ids = db_session.query(Area.id).filter().all() for area in area_ids: area_id = area[0] running_jobs_num = db_session.query(Job).filter( Job.area == area_id, Job.status == 'running').count() db_session.query(Area).filter(Area.id == area_id).update( {Area.running_tasks: running_jobs_num}, synchronize_session=False) logger.info('update_area_status Area id={}, status={}'.format( area_id, running_jobs_num)) except Exception as e: logger.exception('update_area_status catch exception, e={}'.format(e)) db_session.rollback() finally: ScopedSession.remove()
def update_account_usage(): """ 根據運行結果實時更新account的使用狀態 :return: """ try: db_session = ScopedSession() running_jobs = db_session.query( Job.account).filter(Job.status == 'running').all() logger.info('update_account_usage, running jobs={}'.format( len(running_jobs))) account_usage = {} for account in running_jobs: account_id = account[0] if account_id in account_usage: account_usage[account_id] += 1 else: account_usage[account_id] = 1 for account_id, using in account_usage.items(): db_session.query(Account).filter(Account.id == account_id).update({ Account.using: using, Account.last_update: datetime.now() }) db_session.commit() except Exception as e: logger.exception( 'update_account_usage catch exception account id={}, e={}'.format( account_id, e)) db_session.rollback() finally: ScopedSession.remove()
def start_task(task_id, force=False): """ 启动任务 :param task_id: 任务id :param force: 是否强制启动国(若是, 所有未完成的任务都将重新启动,用于系统重启) :return: 成功返回True, 失败返回False """ # res = TaskOpt.get_task_status_apsid(task_id) try: db_session = ScopedSession() res = db_session.query( Task.status, Task.aps_id, Task.scheduler).filter(Task.id == task_id).first() if not res: logger.error( 'start_task can not find the task id={}. '.format(task_id)) return Result(res=False, msg='can not find the task') status, aps_id, scheduler = res # 已经完成或失败的任务不再重新启动 if status in ['succeed', 'failed', 'cancelled']: logger.warning("task is finished. task id={}, status={}".format( task_id, status)) return Result(res=False, msg='task is finished.') # 强制状态下,可以启动所有new, pending, pausing, running状态的任务, if force: # 如果task已经启动,先移除(用于系统重启) if status in ['pending', 'pausing', 'running']: try: g_bk_scheduler.remove_job(aps_id) except JobLookupError: logger.warning('job have been removed.') db_session.query(Task).filter(Task.id == task_id). \ update({Task.status: "new", Task.aps_id: '', Task.start_time: None, Task.last_update: datetime.now()}, synchronize_session=False) db_session.commit() else: # 非强制状态只能启动new任务 if status != 'new': logger.warning( 'start_task task is not a new task, task id={} status={}. ' .format(task_id, status)) return Result(res=False, msg='is not a new task') # 开始启动任务调度 aps_job, status_new = scheduler_task(db_session, scheduler, task_id) # 将aps id 更新到数据库中, aps id 将用于任务的暂停、恢复、终止 if aps_job: if status_new: # 如果任务调度开始执行了, task状态会被置为running,就不用再改回pending db_session.query(Task).filter(Task.id == task_id). \ update({Task.status: status_new, Task.aps_id: aps_job.id, Task.last_update: datetime.now()}, synchronize_session=False) else: # 千万不能把任务设置成running db_session.query(Task).filter(Task.id == task_id). \ update({Task.aps_id: aps_job.id, Task.start_time: datetime.now(), Task.last_update: datetime.now()}, synchronize_session=False) db_session.commit() logger.info( '----start task succeed. task id={}, aps id={}, status={}-----' .format(task_id, aps_job.id, status_new)) # TaskOpt.set_task_status(None, task_id, status='pending', aps_id=aps_job.id) else: logger.error( 'start task can not scheduler task, task id={}, status={}, scheduler={}' .format(task_id, status, scheduler)) return Result(res=False, msg='scheduler task failed') except Exception as e: db_session.rollback() logger.exception('start_task catch exception task id={}, e={}'.format( task_id, e)) finally: ScopedSession.remove() return Result(res=True, msg='start task succeed. id={}'.format(task_id))
def update_results(): """ 根据backend中的值更新数据库中job的状态, 每次只更新运行超过5分钟的job, 同时逆向更新task状态 :return: """ status_map = { 'SUCCESS': 'succeed', 'FAILURE': 'failed', 'PENDING': 'pending', 'RUNNING': 'running' } del_keys = [] is_exception = False time_it_beg = datetime.now() try: updated_jobs_num = 0 failed_jobs_num = 0 succeed_jobs_num = 0 db_session = ScopedSession() # 取出5分钟前启动且没有执行完毕的job, 去redis中查询结果是否已经出来了 job_update_period = get_task_args()['job_update_period'] job_start_time_limit = datetime.now() - timedelta( seconds=job_update_period) need_update_jobs = db_session.query( Job.id, Job.track_id, Job.account, Job.start_time).filter( and_(Job.status == 'running', Job.start_time <= job_start_time_limit)).all() logger.info('-------need update jobs num={}'.format( len(need_update_jobs))) for job_id, track_id, account_id, start_time in need_update_jobs: key_job = 'celery-task-meta-{}'.format(track_id) result = RedisOpt.read_backend(key=key_job) if result: dict_res = json.loads(result) status = status_map.get(dict_res.get('status'), dict_res.get('status')) job_res = dict_res.get('result', '') # logger.info("update_results job res={}".format(job_res)) # 除了任务本身的成败外,还需要关注实际返回的结果 if isinstance(job_res, dict): if job_res.get('status', '') == 'failed': status = 'failed' account_status = job_res.get('account_status', '') account_config = job_res.get('account_configure', {}) logger.info( 'update account account_id={}, status={}, config={}'. format(account_id, account_status, account_config)) # 如果返回的账号状态不为空再更新, 否则不更新状态 if account_status and account_config: db_session.query(Account).filter( Account.id == account_id).update({ Account.status: account_status, Account.configure: json.dumps(account_config), Account.last_update: datetime.now() }) elif account_status: db_session.query(Account).filter( Account.id == account_id).update({ Account.status: account_status, Account.last_update: datetime.now() }) elif account_config: db_session.query(Account).filter( Account.id == account_id).update({ Account.configure: json.dumps(account_config), Account.last_update: datetime.now() }) # job里不再存放账号相关的信息 if account_config: job_res.pop('account_configure') str_job_res = json.dumps(job_res) else: str_job_res = str(job_res) db_session.query(Job).filter(Job.id == job_id).update( { Job.status: status, Job.result: str_job_res, Job.traceback: str(dict_res.get('traceback', '')), Job.end_time: datetime.now() }, synchronize_session=False) del_keys.append(key_job) updated_jobs_num += 1 if status == 'succeed': succeed_jobs_num += 1 else: failed_jobs_num += 1 elif start_time: job_timeout = get_task_args()['job_timeout'] if start_time < datetime.now() - timedelta( seconds=job_timeout): logger.warning( 'job is timeout, job id={}, start time={}'.format( job_id, start_time)) db_session.query(Job).filter(Job.id == job_id).update( { Job.status: 'failed', Job.result: json.dumps({ 'status': 'failed', 'err_msg': 'job timeout' }), Job.traceback: '', Job.end_time: datetime.now() }, synchronize_session=False) failed_jobs_num += 1 db_session.commit() logger.info( '-------actually update jobs num={}'.format(updated_jobs_num)) except Exception as e: is_exception = True logger.exception( '--------update_results catch exception e={}'.format(e)) db_session.rollback() finally: ScopedSession.remove() # 将落盘成功的数据从缓存区清掉 if not is_exception: if updated_jobs_num > 0: # RedisOpt.delete_backend_more(*del_keys) last_num = RedisOpt.read_object('total_updated_jobs_num') last_succeed_num = RedisOpt.read_object('succeed_jobs_num') last_failed_num = RedisOpt.read_object('failed_jobs_num') total_updated_jobs_num = updated_jobs_num + int( last_num) if last_num != -1 else updated_jobs_num succeed_jobs_num = succeed_jobs_num + int( last_succeed_num ) if last_succeed_num != -1 else succeed_jobs_num failed_jobs_num = failed_jobs_num + int( last_failed_num) if last_failed_num != -1 else failed_jobs_num RedisOpt.write_object(key='total_updated_jobs_num', value=total_updated_jobs_num) RedisOpt.write_object(key='succeed_jobs_num', value=succeed_jobs_num) RedisOpt.write_object(key='failed_jobs_num', value=failed_jobs_num) # 根据更新后的job状态,逆向更新任务状态 update_task_status() # 根据运行结果实时更新agent的忙碌程度 update_area_status() # 根據運行結果實時更新account的使用狀態 update_account_usage() # 处理任务状态变更 process_updated_tasks() # 启动所有新建任务 start_all_new_tasks() logger.info('update results used {} seconds.'.format( (datetime.now() - time_it_beg).seconds))
def update_task_status(): """ 更新任务状态 :return: """ try: db_session = ScopedSession() running_tasks = db_session.query(Task).filter( Task.status == 'running').all() # running_tasks = TaskOpt.get_all_running_task() for task in running_tasks: failed_counts = db_session.query(Job.status).filter( and_(Job.task == task.id, Job.status == 'failed')).count() succeed_counts = db_session.query(Job.status).filter( and_(Job.task == task.id, Job.status == 'succeed')).count() task.succeed_counts = succeed_counts task.failed_counts = failed_counts logger.info( '-----update_task_status task id={} status={}, succeed={}, failed={}, real accounts num={}' .format(task.id, task.status, task.succeed_counts, task.failed_counts, task.real_accounts_num)) # sch = SchedulerOpt.get_scheduler(task.scheduler) sch_mode, sch_end_date = db_session.query(Scheduler.mode, Scheduler.end_date)\ .filter(Scheduler.id == task.scheduler).first() # 如果是一次性任务,只要所有job结果都返回了, task即结束 if sch_mode in [0, 3]: running_jobs = db_session.query(Job.status).filter( and_(Job.task == task.id, Job.status == 'running')).count() logger.info('task id={}, running jobs={}'.format( task.id, running_jobs)) one_time_task_timeout = get_task_args( )['one_time_task_timeout'] if ((task.failed_counts + task.succeed_counts) >= task.real_accounts_num) \ or running_jobs == 0 or (task.start_time and task.start_time < datetime.now()-timedelta(seconds=one_time_task_timeout)): if task.succeed_counts >= task.limit_counts: task.status = 'succeed' else: task.status = 'failed' # 对于周期性任务, 如果成功次数达到需求最大值, 或者时间达到终止时间, 则任务置为结束, 并从scheduler中移除所有的aps job else: if task.succeed_counts >= task.limit_counts: task.status = 'succeed' elif sch_end_date and datetime.now() >= sch_end_date: task.status = 'failed' else: # 周期性任务的最长期限上限为120天, 超过120天自动关闭 cycle_task_timeout = get_task_args()['cycle_task_timeout'] if task.start_time < datetime.now() - timedelta( seconds=cycle_task_timeout): task.status = 'failed' if task.status in ['succeed', 'failed']: end_t = datetime.now() task.end_time = end_t task.last_update = end_t # aps_id = TaskOpt.get_aps_ids_by_task_id(task.id) aps_id = db_session.query( Task.aps_id).filter(Task.id == task.id).first()[0] try: g_bk_scheduler.remove_job(aps_id) except JobLookupError: logger.warning( 'job have been removed. aps_id={}'.format(aps_id)) logger.info( 'update_task_status task {} status={}, succeed={}, failed={}' .format(task.id, task.status, task.succeed_counts, task.failed_counts)) db_session.commit() except Exception as e: logger.exception('update_task_status catch exception e={}'.format(e)) db_session.rollback() finally: ScopedSession.remove()
def send_task_2_worker(task_id): """ 定时任务响应函数,负责把任务按账号拆解成job, 并发送给最适合的队列 :param task_id: 任务id :return: 成功返回True, 失败返回False """ try: jobs = [] time_it_beg = datetime.datetime.now() db_scoped_session = ScopedSession() task = db_scoped_session.query( Task.category, Task.configure, Task.limit_counts, Task.succeed_counts, Task.scheduler).filter(Task.id == task_id).first() if not task: logger.error( 'send_task_2_worker can not find the task, id={}. '.format( task_id)) return False category, task_configure, limit_counts, succeed_counts, sch_id = task sch_mode = db_scoped_session.query( Scheduler.mode).filter(Scheduler.id == sch_id).first() # 对于周期性任务,每次产生的job会严格控制, 但对于一次性任务, 用户指定多少个账号,就用多少个账号 if sch_mode[0] in [1, 2]: if limit_counts: # 如果当前任务的成功数大于需求数, 或者成功数加上正在运行的job数目大于用于需求数110%, 则不需要继续产生job if succeed_counts >= int(limit_counts * 1.2): logger.warning( 'send_task_2_worker ignore, task already finished, task id={}, succeed jobs({}) >= limit counts({})*1.2' .format(task_id, succeed_counts, limit_counts)) return True task_running_jobs = db_scoped_session.query(Job).filter( and_(Job.task == task_id, Job.status == 'running')).count() if task_running_jobs + succeed_counts >= int( limit_counts * 1.2): logger.warning( 'send_task_2_worker ignore, task will finish, task id={}, succeed jobs({})+running jobs({}) >= limit counts({})*1.2' .format(task_id, succeed_counts, task_running_jobs, limit_counts)) return True # 一个任务正在运行job积压过多时, 暂时停止产生新的jobs if task_running_jobs >= 10000: logger.warning( 'task({}) jobs num={} has reached jobs limit 10000'. format(task_id, task_running_jobs)) return True # 根据task的类别,找到task对应的处理函数 tcg = db_scoped_session.query(TaskCategory.processor).filter( TaskCategory.category == category).first() if not tcg: return False # 每一个类型的任务都对应一个处理器 task_processor = tcg[0] if not task_processor: logger.error( 'Task(id={}) have no processor, ignore processing.'.format( task_id)) return False logger.info( '---------send_task_2_worker task id={}. --------'.format(task_id)) # 找到任务的所有账号 res = db_scoped_session.query(TaskAccountGroup.account_id).filter( TaskAccountGroup.task_id == task_id).all() account_ids = [x[0] for x in res] accounts = db_scoped_session.query( Account.id, Account.status, Account.account, Account.password, Account.email, Account.email_pwd, Account.gender, Account.phone_number, Account.birthday, Account.national_id, Account.name, Account.active_area, Account.active_browser, Account.profile_path, Account.configure).filter(Account.id.in_(account_ids)).all() # agents = db_scoped_session.query(Agent.id, Agent.active_area).filter(Agent.status != -1).order_by(Agent.status).all() # 一个任务会有多个账号, 按照账号对任务进行第一次拆分 real_accounts_num = 0 for acc in accounts: acc_id, status, account, password, email, email_pwd, gender, phone_number, birthday, national_id, name, \ active_area, active_browser_id, profile_path, account_configure = acc if status == 'invalid': logger.warning( 'account status in invalid. task id={}, account id={}'. format(task_id, acc_id)) continue area = db_scoped_session.query(Area).filter( Area.id == active_area).first() queue_name = 'default' area_id = None if area: area_id, queue_name = area.id, area.name else: logger.warning( 'There have no optimal agent for task, task id={}, account id={}, account area={}' .format(task_id, acc_id, active_area)) active_browser = db_scoped_session.query(FingerPrint.value).filter( FingerPrint.id == active_browser_id).first() if get_system_args()["force_display"] == 0: headless = True if get_environment() == 'pro' else False else: headless = False # 构建任务执行必备参数 inputs = { 'system': { 'headless': headless }, 'task': { 'task_id': task_id, 'configure': json.loads(task_configure) if task_configure else {}, }, 'account': { 'account': account, 'password': password, 'status': status, 'email': email, 'email_pwd': email_pwd, 'gender': gender, 'phone_number': phone_number, 'birthday': birthday, 'national_id': national_id, 'name': name, 'active_area': active_area, 'active_browser': json.loads(active_browser[0]) if active_browser else {}, 'profile_path': profile_path, 'configure': json.loads(account_configure) if account_configure else {} } } celery_task_name = "tasks.tasks.{}".format(task_processor) real_accounts_num += 1 track = app.send_task(celery_task_name, args=(inputs, ), queue=queue_name, routing_key=queue_name) logger.info( '-----send sub task to worker, celery task name={}, area id={}, queue={}, ' 'task id={}, account id={}, track id={}'.format( celery_task_name, area_id, queue_name, task_id, acc_id, track.id)) job = Job() job.task = task_id job.task = task_id job.account = acc_id job.area = area_id job.status = 'running' job.track_id = track.id job.start_time = datetime.datetime.now() jobs.append(job) if sch_mode[0] in [1, 2]: # 如果已经在运行的jobs,加上当前产生的jobs数量超过用户需求数量,则break, 停止生产jobs, 下个调度周期重新检测再试 total_running_jobs = task_running_jobs + real_accounts_num if (limit_counts and total_running_jobs >= int( limit_counts * 1.2)) or total_running_jobs >= 10000: logger.warning( 'task({}) total running jobs num({}) is already more than limit counts({})*1.2' .format(task_id, total_running_jobs, limit_counts)) break # 更新任务状态为running # task实际可用的账号数目, 会根据每次轮循时account状态的不同而变化 db_scoped_session.query(Task).filter(and_(Task.id == task_id, Task.status.in_(['new', 'pending'])))\ .update({Task.status: "running", Task.start_time: datetime.datetime.now(), Task.real_accounts_num: real_accounts_num, Task.last_update: datetime.datetime.now()}, synchronize_session=False) if jobs: db_scoped_session.add_all(jobs) db_scoped_session.commit() logger.info( '----send_task_2_worker send task {}, produce jobs={}, used {} seconds. ' .format(task_id, real_accounts_num, (datetime.datetime.now() - time_it_beg).seconds)) except BaseException as e: logger.exception( 'send_task_2_worker exception task id={}, e={}'.format(task_id, e)) db_scoped_session.rollback() finally: ScopedSession.remove() return True