def rpc_push_job(exec_id, interface_id, job_id, server_host, port, params_value, server_dir, server_script, return_code, status, date_format='%Y%m%d', run_date=''): """ RPC分发任务 1.替换$date变量 :param exec_id: 执行id :param interface_id: 任务流id :param job_id: 任务id :param server_host: RPC执行服务器域名 :param port: RPC执行服务器端口 :param params_value: 参数值字符串 :param server_dir: 脚本目录 :param server_script: 运行脚本 :param return_code: 状态返回码 :param status: 任务状态 :param date_format: 日期格式 :param run_date: 数据日期 :return: """"" try: # rpc分发任务 client = Connection(server_host, port) # 任务参数中数据日期变量为T-1 if not run_date: run_time = (date.today() + timedelta(days=-1)).strftime(date_format) else: run_time = run_date params = params_value.split(',') if params_value else [] client.rpc.event_execute( exec_id=exec_id, interface_id=interface_id, job_id=job_id, server_dir=server_dir, server_script=server_script, return_code=return_code, params=[item if item != '$date' else run_time for item in params], status=status ) client.disconnect() return '' except: err_msg = 'rpc连接异常: host: %s, port: %s' % (server_host, port) # 添加执行任务详情日志 EventModel.add_event_exec_detail_job(db.etl_db, exec_id, interface_id, job_id, 'ERROR', server_dir, server_script, err_msg, 3) # 修改数据库, 分布式锁 with MysqlLock(config.mysql.etl, 'event_lock_%s' % exec_id): # 修改执行详情表状态[失败] EventModel.update_event_exec_job_status(db.etl_db, exec_id, interface_id, job_id, 'failed') # 修改执行任务流状态[失败] EventModel.update_event_exec_interface_status(db.etl_db, exec_id, interface_id, -1) # 修改执行主表状态[失败] EventModel.update_event_execute_status(db.etl_db, exec_id, -1) log.error(err_msg, exc_info=True) return err_msg
def get_event_job(event_id, exec_type=1, run_date='', date_format='%Y%m%d'): """ 事件执行开始方法 1.传入事件id(ftp_event_id) 2.获取事件详情(任务流id, 任务流名称, 数据日期) 3.获取FTP服务器配置(传入ftp_event_id) 4.FTP服务器不存在抛出异常 5.检测FTP服务器连接, 将数据日期替换文件名, 查询文件是否存在 6.不存在退出 7.条件一: 文件存在; 条件二: 未存在当前数据日期的成功执行记录(调度id查询), 执行任务流 8.构造任务流, for任务流列表, return任务流依赖数据结构, 每个dict遍历一遍, 是否存在未for的key, 如果存在(该任务流在之前任务流的数据结构中), 跳过该任务流, 写入数据库, 执行部分同调度触发, 执行成功时修改数据日期到当天 :param event_id: 事件id :param exec_type: 执行类型: 1.自动, 2.手动 :param run_date: 手动传入$date日期 :param date_format: $date日期格式 :return: None """ # 传入日期 if run_date and date_format: run_time = time.strftime(date_format, time.strptime(run_date, '%Y-%m-%d')) else: event_detail = FtpEventModel.get_ftp_event_detail(db.etl_db, event_id) if event_detail and event_detail['date_time']: run_time = time.strftime(date_format, time.strptime(event_detail['date_time'], '%Y-%m-%d')) else: run_time = time.strftime(date_format, time.localtime()) # 任务流详情 detail_list = EventModel.get_interface_detail_by_ftp_event_id(db.etl_db, event_id) # 检测是否执行 # 获取FTP服务器配置 ftp_detail = FtpEventModel.get_ftp_detail_by_event_id(db.etl_db, event_id) # 检测FTP服务器文件是否存在 if isinstance(ftp_detail['ftp_passwd'], bytes): ftp_detail['ftp_passwd'] = ftp_detail['ftp_passwd'].decode('utf-8', 'ignore') try: # FTP连接 if ftp_detail['ftp_type'] == 1: ftp = FtpLink(ftp_detail['ftp_host'], ftp_detail['ftp_port'], ftp_detail['ftp_user'], ftp_detail['ftp_passwd']) FtpModel.update_ftp_status(db.etl_db, ftp_detail['ftp_id'], 0) # 文件名 file_name = time.strftime(ftp_detail['file_name'], time.strptime(ftp_detail['date_time'], '%Y-%m-%d')) result = ftp.test_file(ftp_detail['data_path'], file_name) ftp.close() # SFTP连接 elif ftp_detail['ftp_type'] == 2: ftp = SftpLink(ftp_detail['ftp_host'], ftp_detail['ftp_port'], ftp_detail['ftp_user'], ftp_detail['ftp_passwd']) FtpModel.update_ftp_status(db.etl_db, ftp_detail['ftp_id'], 0) # 文件名 file_name = time.strftime(ftp_detail['file_name'], time.strptime(ftp_detail['date_time'], '%Y-%m-%d')) result = ftp.test_file(ftp_detail['data_path'], file_name) ftp.close() else: FtpModel.update_ftp_status(db.etl_db, ftp_detail['ftp_id'], 1) return Response(status=400, msg='FTP服务器类型未知') except: FtpModel.update_ftp_status(db.etl_db, ftp_detail['ftp_id'], 1) return Response(status=400, msg='FTP连接异常') # 当前数据日期的成功执行记录 success_detail = EventModel.get_event_exec_detail_success(db.etl_db, event_id, ftp_detail['date_time']) # 文件存在, 未存在当前数据日期的成功执行记录(调度id查询) if result and not success_detail: # 执行任务流 pass else: return Response(status=400, msg='FTP文件目录不存在') interface_dag_nodes = {} # 遍历多个任务流 for detail in detail_list: # 生成执行任务流前后依赖关系 dag = generate_interface_dag_by_event(detail) # 生成执行任务流树形关系 tree = generate_interface_tree_by_event(detail) tree_nodes = [_ for _ in tree.keys()] # 填充树形节点 for key in set(tree_nodes): dag[key]['is_tree'] = 1 # 合并 interface_dag_nodes.update(dag) if not interface_dag_nodes: return # 需执行任务流 interface_tree_nodes = {key: value for key, value in interface_dag_nodes.items() if value.get('is_tree', 0) == 1} # 获取所有任务流的任务详情 job_nodes = {} for _, item in interface_tree_nodes.items(): jobs = generate_job_dag_by_interface(item['id']) job_nodes[item['id']] = jobs # 添加执行主表, 任务流表, 任务表至数据库 exec_id = add_event_exec_record(event_id, interface_dag_nodes, job_nodes, exec_type, run_time, date_format) # 初始任务流 start_interface = [_ for _, item in interface_tree_nodes.items() if item['level'] == 0] # 开始执行初始任务流中的任务 flag = False for curr_interface in start_interface: start_jobs = job_nodes[curr_interface] # 任务流中任务为空, 则视调度已完成 if not start_jobs: flag = True log.info('事件任务流中任务为空: 事件id: %s, 执行id: %s, 任务流id: %s' % (event_id, exec_id, curr_interface)) # 修改执行任务流[成功] with MysqlLock(config.mysql.etl, 'event_lock_%s' % exec_id): EventModel.update_event_exec_interface_status(db.etl_db, exec_id, curr_interface, 0) else: # 修改执行任务流[运行中] with MysqlLock(config.mysql.etl, 'event_lock_%s' % exec_id): EventModel.update_event_exec_interface_status(db.etl_db, exec_id, curr_interface, 1) # rpc分发任务 for job in start_jobs: if job['level'] == 0: # 修改执行详情表状态[运行中] with MysqlLock(config.mysql.etl, 'event_lock_%s' % exec_id): EventModel.update_event_exec_job_status(db.etl_db, exec_id, curr_interface, job['id'], 'running') log.info('事件分发任务: 执行id: %s, 任务流id: %s, 任务id: %s' % (exec_id, curr_interface, job['id'])) rpc_push_job(exec_id, curr_interface, job['id'], job['server_host'], config.exec.port, ','.join(job['params_value']), job['server_dir'], job['server_script'], job['return_code'], job['status'], run_date=run_time) # 继续下一个任务流 if flag: next_jobs = continue_event_execute_interface(exec_id, exec_type=exec_type, run_date=run_time) if not next_jobs: return for interface_id, item in next_jobs.items(): for job_id in set(item['job_id']): log.info('分发任务: 执行id: %s, 任务流id: %s, 任务id: %s' % (exec_id, interface_id, job_id)) nodes = item['nodes'] rpc_push_job(exec_id, interface_id, job_id, nodes[job_id]['server_host'], config.exec.port, nodes[job_id]['params_value'], nodes[job_id]['server_dir'], nodes[job_id]['server_script'], nodes[job_id]['return_code'], nodes[job_id]['status'], run_date=run_time)