def run(self): """ 阻塞方式找到任务,并自动调用""" # 如果上次有任务在运行还没结束,重新执行 jobs = ztq_core.get_job_state(CONFIG['server']['alias']) if self.name in jobs: self.start_job(jobs[self.name]) # 队列批处理模式 # batch_size: 批处理的阀值,达到这个阀值,就执行一次batch_func # batch_func: # 1, 执行一批batch_size 大小的任务后,后续自动执行这个方法方法 # 2, 执行一批小于batch_size 大小的任务后,再得不到任务,后续自动执行这个方法 batch_config = CONFIG.get("batch_queue", {}).get(self.queue_name, {}) batch_size = batch_config.get('batch_size', None) or -1 batch_func = batch_config.get('batch_func', None) or (lambda *args, **kw: -1) run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT # 循环执行任务 while not self._stop: try: task = ztq_core.pop_task( self.queue_name, timeout=queue_tiemout, from_right=self.from_right ) except ztq_core.ConnectionError, e: logger.error('ERROR: redis connection error: %s' % str(e)) time.sleep(3) continue except ztq_core.ResponseError, e: logger.error('ERROR: redis response error: %s' % str(e)) time.sleep(3) continue
def stop_working_job(request): """停止正在进行中的转换的工作 """ # 获取url操作 worker_id = request.matchdict['id'] thread = request.matchdict['thread'] thread_pid = request.matchdict['pid'] # pid为-1则不能杀 if thread_pid == '-1': jobs = ztq_core.get_job_state(worker_id) task = jobs[thread] task['runtime']['reason'] = "manual stopped" task['runtime']['end'] = int( time.time() ) ztq_core.push_runtime_error(task['runtime']['queue'], task) del jobs[thread] return HTTPFound(location = '/workerstatus') kill_command = { 'command':'kill', 'timestamp':int(time.time()), 'pid': thread_pid } cmd_queue = ztq_core.get_command_queue(worker_id) # 避免同时发送多条结束命令 if cmd_queue: for command in cmd_queue: if command.get('pid', None) == kill_command['pid']: return HTTPFound(location = '/workerstatus') cmd_queue.push(kill_command) return HTTPFound(location = '/workerstatus')
def stop_working_job(request): """停止正在进行中的转换的工作 """ # 获取url操作 worker_id = request.matchdict['id'] thread = request.matchdict['thread'] thread_pid = request.matchdict['pid'] # pid为-1则不能杀 if thread_pid == -1: jobs = ztq_core.get_job_state(worker_id) task = jobs[thread] task['runtime']['reason'] = "manual stopped" task['runtime']['end'] = int(time.time()) ztq_core.push_runtime_error(task['runtime']['queue'], task) del jobs[thread] return HTTPFound(location='/workerstatus') kill_command = { 'command': 'kill', 'timestamp': int(time.time()), 'pid': thread_pid } cmd_queue = ztq_core.get_command_queue(worker_id) # 避免同时发送多条结束命令 if cmd_queue: for command in cmd_queue: if command.get('pid', None) == kill_command['pid']: return HTTPFound(location='/workerstatus') cmd_queue.push(kill_command) return HTTPFound(location='/workerstatus')
def run(self): """ 阻塞方式找到任务,并自动调用""" # 如果上次有任务在运行还没结束,重新执行 jobs = ztq_core.get_job_state(safe_get_host('server', 'alias')) if self.name in jobs: self.start_job(jobs[self.name]) # 队列批处理模式 # batch_size: 批处理的阀值,达到这个阀值,就执行一次batch_func # batch_func: # 1, 执行一批batch_size 大小的任务后,后续自动执行这个方法方法 # 2, 执行一批小于batch_size 大小的任务后,再得不到任务,后续自动执行这个方法 batch_config = CONFIG.get("batch_queue", {}).get(self.queue_name, {}) batch_size = batch_config.get('batch_size', None) or -1 batch_func = batch_config.get('batch_func', None) or (lambda *args, **kw: -1) run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT # 循环执行任务 while not self._stop: try: task = ztq_core.pop_task( self.queue_name, timeout=queue_tiemout, from_right=self.from_right ) except ztq_core.ConnectionError: print 'ERROR: Not connected the server\n' task = None time.sleep(3) if task is None: # 没有后续任务了。执行batch_func if run_job_index > 0: run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT batch_func() continue self.start_job(task) if batch_size > 0: if run_job_index >= batch_size - 1: # 完成了一批任务。执行batch_func run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT batch_func() else: run_job_index += 1 queue_tiemout = -1 if self.sleep_time: time.sleep(self.sleep_time)
def run(self): """ 阻塞方式找到任务,并自动调用""" # 如果上次有任务在运行还没结束,重新执行 jobs = ztq_core.get_job_state(safe_get_host('server', 'alias')) if self.name in jobs: self.start_job(jobs[self.name]) # 队列批处理模式 # batch_size: 批处理的阀值,达到这个阀值,就执行一次batch_func # batch_func: # 1, 执行一批batch_size 大小的任务后,后续自动执行这个方法方法 # 2, 执行一批小于batch_size 大小的任务后,再得不到任务,后续自动执行这个方法 batch_config = CONFIG.get("batch_queue", {}).get(self.queue_name, {}) batch_size = batch_config.get('batch_size', None) or -1 batch_func = batch_config.get('batch_func', None) or (lambda *args, **kw: -1) run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT # 循环执行任务 while not self._stop: try: task = ztq_core.pop_task(self.queue_name, timeout=queue_tiemout, from_right=self.from_right) except ztq_core.ConnectionError: print 'ERROR: Not connected the server\n' task = None time.sleep(3) if task is None: # 没有后续任务了。执行batch_func if run_job_index > 0: run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT batch_func() continue self.start_job(task) if batch_size > 0: if run_job_index >= batch_size - 1: # 完成了一批任务。执行batch_func run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT batch_func() else: run_job_index += 1 queue_tiemout = -1 if self.sleep_time: time.sleep(self.sleep_time)
def config_worker(request): """对worker进行配置管理 """ url_action = request.params.get('action', '') dispatcher_config = ztq_core.get_dispatcher_config() worker_weight = dispatcher_config['worker_weight'] # 获取用户请求操作 worker_id = request.matchdict['id'] # 根据操作类型进行权重调整, if url_action == 'stop_worker': #停止worker worker_weight[worker_id] = 0 elif url_action == 'enable': #启用worker worker_weight[worker_id] = 5 elif url_action == 'worker_down': #降低worker权重 worker_weight[worker_id] -= 1 if worker_weight[worker_id] < 1: worker_weight[worker_id] = 1 elif url_action == 'worker_up': #提升worker权重 worker_weight[worker_id] += 1 if worker_weight[worker_id] > 10: worker_weight[worker_id] = 10 elif url_action == 'delete': #删除还没启用的worker,删除操作不会导致调度配置更新 if worker_id in worker_weight: # 没有启用的情况 worker_weight.pop(worker_id) workers_dict = ztq_core.get_worker_state() del workers_dict[worker_id] worker_job = ztq_core.get_job_state(worker_id) for job_name, job_status in worker_job.items(): del worker_job[job_name] ztq_core.set_dispatcher_config(dispatcher_config) return HTTPFound(location='/workerstatus') elif url_action == 'update': # 发报告指令到各命令队列让worker报告自身状态 worker_list = ztq_core.get_all_worker() for worker_name in worker_list: if worker_name == worker_id: utils.send_command(worker_name, 'report') time.sleep(1) return HTTPFound(location='/workerstatus') # 更新调度策略并进行调度 ztq_core.set_dispatcher_config(dispatcher_config) utils.dispatch() return HTTPFound(location='/workerstatus')
def config_worker(request): """对worker进行配置管理 """ url_action = request.params.get('action','') dispatcher_config = ztq_core.get_dispatcher_config() worker_weight = dispatcher_config['worker_weight'] # 获取用户请求操作 worker_id = request.matchdict['id'] # 根据操作类型进行权重调整, if url_action == 'stop_worker': #停止worker worker_weight[worker_id] = 0 elif url_action == 'enable': #启用worker worker_weight[worker_id] = 5 elif url_action == 'worker_down' : #降低worker权重 worker_weight[worker_id] -= 1 if worker_weight[worker_id] < 1: worker_weight[worker_id] = 1 elif url_action == 'worker_up' : #提升worker权重 worker_weight[worker_id] += 1 if worker_weight[worker_id] >10: worker_weight[worker_id] = 10 elif url_action == 'delete': #删除还没启用的worker,删除操作不会导致调度配置更新 if worker_id in worker_weight: # 没有启用的情况 worker_weight.pop(worker_id) workers_dict = ztq_core.get_worker_state() del workers_dict[worker_id] worker_job = ztq_core.get_job_state(worker_id) for job_name, job_status in worker_job.items(): del worker_job[job_name] ztq_core.set_dispatcher_config(dispatcher_config) return HTTPFound(location = '/workerstatus') elif url_action == 'update': # 发报告指令到各命令队列让worker报告自身状态 worker_list = ztq_core.get_all_worker() for worker_name in worker_list: if worker_name == worker_id: utils.send_command(worker_name, 'report') time.sleep(1) return HTTPFound(location = '/workerstatus') # 更新调度策略并进行调度 ztq_core.set_dispatcher_config(dispatcher_config) utils.dispatch() return HTTPFound(location = '/workerstatus')
def report_job(pid=-1, comment='', **kw): """ 报告当前转换进程信息 """ if not hasattr(thread_context, 'job'): return # 如果不在线程中,不用报告了 job = thread_context.job # 报告转换状态 job['process'].update({'pid': pid, 'start':int(time.time()), 'comment':comment}) if kw: job['process'].update(kw) # 写入状态 job_state = ztq_core.get_job_state(job['runtime']['worker']) job_state[job['runtime']['thread']] = job
def get_worker_list(): dispatcher_config = ztq_core.get_dispatcher_config() worker_weight = dispatcher_config['worker_weight'] workers_dict = ztq_core.get_worker_state().items() for worker_name, worker_status in workers_dict: worker_status['_worker_name'] = worker_name worker_status['_started'] = \ datetime.datetime.fromtimestamp(worker_status['started']) worker_status['_timestamp'] = \ datetime.datetime.fromtimestamp(worker_status['timestamp']) worker_status['_worker_weight'] = worker_weight.get(worker_name, 0) # 检查worker是否在工作 cmd_queue = ztq_core.get_command_queue(worker_name) # 如果指令队列不为空的话,意味着worker没工作,属于下线状态 if cmd_queue: worker_status['_active'] = u'shutdown' elif worker_status['_worker_weight'] == 0: worker_status['_active'] = u'ldle' else: worker_status['_active'] = u'work' # 获取worker开了多少个线程 worker_job = ztq_core.get_job_state(worker_name) worker_status['_threads'] = [] for thread_name, thread_status in worker_job.items(): thread_status['_detail'] = pprint.pformat(thread_status) thread_status['_name'] = thread_name thread_status['_comment'] = thread_status['kw'].get( 'comment', thread_status['process'].get('comment', '')) thread_status['_pid'] = thread_status['process'].get('pid', -1) ident = unicode(thread_status['process'].get('ident', -1)) if ident in worker_status['traceback']: thread_status['_thread_detail'] = pprint.pformat( worker_status['traceback'][ident]) # 任务进行了多少时间 used_time = int(time.time()) - thread_status['process']['start'] if used_time > 3600: used_time = u'%.2f小时' % (used_time / 3600.0) elif used_time > 60: used_time = u'%.2f分钟' % (used_time / 60.0) thread_status['_take_time'] = used_time worker_status['_threads'].append(thread_status) yield worker_status
def get_worker_list(): workers_dict = ztq_core.get_worker_state().items() for worker_name, worker_status in workers_dict: worker_status['_worker_name'] = worker_name worker_status['_started'] = \ datetime.datetime.fromtimestamp(worker_status['started']) worker_status['_timestamp'] = \ datetime.datetime.fromtimestamp(worker_status['timestamp']) # 检查worker是否在工作 cmd_queue = ztq_core.get_command_queue(worker_name) # 如果指令队列不为空的话,意味着worker没工作,属于下线状态 if cmd_queue: worker_status['_active'] = u'shutdown' else: worker_status['_active'] = u'work' # 获取worker开了多少个线程 worker_job = ztq_core.get_job_state(worker_name) worker_status['_threads'] = [] for thread_name,thread_status in worker_job.items(): thread_status['_detail'] = pprint.pformat(thread_status) thread_status['_name'] = thread_name thread_status['_comment'] = thread_status['kw'].get('comment',thread_status['process'].get('comment', '')) thread_status['_pid'] = thread_status['process'].get('pid', -1) ident = unicode(thread_status['process'].get('ident', -1)) if ident in worker_status['traceback']: thread_status['_thread_detail'] = pprint.pformat(worker_status['traceback'][ident]) # 任务进行了多少时间 used_time = int(time.time())-thread_status['process']['start'] if used_time > 3600: used_time = u'%.2f小时' % (used_time / 3600.0) elif used_time > 60: used_time = u'%.2f分钟' % (used_time / 60.0) thread_status['_take_time'] = used_time worker_status['_threads'].append(thread_status) yield worker_status
def config_worker(request): """对worker进行配置管理 """ url_action = request.params.get('action', '') # 获取用户请求操作 worker_id = request.matchdict['id'] if url_action == 'delete': #删除还没启用的worker,删除操作不会导致调度配置更新 workers_dict = ztq_core.get_worker_state() del workers_dict[worker_id] worker_job = ztq_core.get_job_state(worker_id) for job_name, job_status in worker_job.items(): del worker_job[job_name] return HTTPFound(location='/workerstatus') elif url_action == 'update': # 发报告指令到各命令队列让worker报告自身状态 worker_list = ztq_core.get_all_worker() for worker_name in worker_list: if worker_name == worker_id: utils.send_command(worker_name, 'report') time.sleep(1) return HTTPFound(location='/workerstatus') return HTTPFound(location='/workerstatus')
def config_worker(request): """对worker进行配置管理 """ url_action = request.params.get('action','') # 获取用户请求操作 worker_id = request.matchdict['id'] if url_action == 'delete': #删除还没启用的worker,删除操作不会导致调度配置更新 workers_dict = ztq_core.get_worker_state() del workers_dict[worker_id] worker_job = ztq_core.get_job_state(worker_id) for job_name, job_status in worker_job.items(): del worker_job[job_name] return HTTPFound(location = '/workerstatus') elif url_action == 'update': # 发报告指令到各命令队列让worker报告自身状态 worker_list = ztq_core.get_all_worker() for worker_name in worker_list: if worker_name == worker_id: utils.send_command(worker_name, 'report') time.sleep(1) return HTTPFound(location = '/workerstatus') return HTTPFound(location = '/workerstatus')
class JobThread(threading.Thread): """ 监视一个原子队列,调用转换引擎取转换 转换结果记录转换队列,转换出错需要记录出错日志与错误队列 """ def __init__(self, queue_name, sleep_time, from_right=True): super(JobThread, self).__init__() self.queue_name = queue_name self.sleep_time = sleep_time self.from_right = from_right # 读取服务器队列的方向,从左边还是右边 # _stop 为 True 就会停止这个线程 self._stop = False self.start_job_time = 0 # 记录任务开始时间 def run(self): """ 阻塞方式找到任务,并自动调用""" # 如果上次有任务在运行还没结束,重新执行 jobs = ztq_core.get_job_state(safe_get_host('server', 'alias')) if self.name in jobs: self.start_job(jobs[self.name]) # 队列批处理模式 # batch_size: 批处理的阀值,达到这个阀值,就执行一次batch_func # batch_func: # 1, 执行一批batch_size 大小的任务后,后续自动执行这个方法方法 # 2, 执行一批小于batch_size 大小的任务后,再得不到任务,后续自动执行这个方法 batch_config = CONFIG.get("batch_queue", {}).get(self.queue_name, {}) batch_size = batch_config.get('batch_size', None) or -1 batch_func = batch_config.get('batch_func', None) or (lambda *args, **kw: -1) run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT # 循环执行任务 while not self._stop: try: task = ztq_core.pop_task(self.queue_name, timeout=queue_tiemout, from_right=self.from_right) except ztq_core.ConnectionError: print 'ERROR: Not connected the server\n' task = None time.sleep(3) if task is None: # 没有后续任务了。执行batch_func if run_job_index > 0: run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT batch_func() continue self.start_job(task) if batch_size > 0: if run_job_index >= batch_size - 1: # 完成了一批任务。执行batch_func run_job_index = 0 queue_tiemout = QUEUE_TIMEOUT batch_func() else: run_job_index += 1 queue_tiemout = -1 if self.sleep_time: time.sleep(self.sleep_time) def start_job(self, task): self.start_job_time = int(time.time()) task['runtime'].update({ 'worker': safe_get_host('server', 'alias'), 'thread': self.getName(), 'start': self.start_job_time, }) # 记录当前在做什么 task['process'] = {'ident': self.ident} thread_context.job = task try: self.run_task = ztq_core.task_registry[task['func']] # started report report_job(comment='start the job') try: self.run_task(*task['args'], **task['kw']) except TypeError: # keyword must string is a bug in python if sys.version[:5] < '2.6.5': raise Exception, ("We not supported %s version of python," "Please update it to 2.6.5 or later." % sys.version[:5]) else: raise if task.get('callback', None): callback_args = task.get('callback_args', ()) callback_kw = task.get('callback_kw', {}) ztq_core.push_task(task['callback'], *callback_args, **callback_kw) except Exception, e: reason = traceback.format_exception(*sys.exc_info()) # 将错误信息记录到服务器 return_code = str(e.args[0]) if len(e.args) > 1 else 300 task['runtime']['return'] = return_code task['runtime']['reason'] = reason[-11:] task['runtime']['end'] = int(time.time()) ztq_core.push_runtime_error(self.queue_name, task) # 错误回调 if task.get('fcallback', None): callback_args = task.get('fcallback_args', ()) callback_kw = task.get('fcallback_kw', {}) callback_kw['return_code'] = return_code callback_kw['return_msg'] = reason[-1] ztq_core.push_task(task['fcallback'], *callback_args, **callback_kw) # 在终端打印错误信息 #reason.insert(0, str(datetime.datetime.today()) + '\n') logger.error(''.join(reason)) # 任务结束,记录日志 task['runtime']['end'] = int(time.time()) if not 'reason' in task['runtime']: task['runtime']['return'] = 0 task['runtime']['reason'] = 'success' ztq_core.get_work_log_queue().push(task) # 删除服务器的转换进程状态信息 job_state = ztq_core.get_job_state(task['runtime']['worker']) del job_state[task['runtime']['thread']] self.start_job_time = 0
return_code = 300 task['runtime']['return'] = return_code task['runtime']['reason'] = reason[-11:] task['runtime']['end'] = int( time.time() ) ztq_core.push_runtime_error(self.queue_name, task) # 错误回调 if task.get('fcallback', None): callback_args = task.get('fcallback_args', ()) callback_kw = task.get('fcallback_kw', {}) callback_kw['return_code'] = return_code callback_kw['return_msg'] = unicode(reason[-1], 'utf-8', 'ignore') ztq_core.push_task(task['fcallback'], *callback_args, **callback_kw) # 在终端打印错误信息 #reason.insert(0, str(datetime.datetime.today()) + '\n') logger.error(''.join(reason)) # 任务结束,记录日志 task['runtime']['end'] = int( time.time() ) ztq_core.get_work_log_queue().push(task) # 删除服务器的转换进程状态信息 job_state = ztq_core.get_job_state(task['runtime']['worker']) del job_state[task['runtime']['thread']] self.start_job_time = 0 def stop(self): """ 结束这个进程,会等待当前转换完成 请通过JobThreadManager 来完成工作线程的退出,不要直接使用这个方法 """ self._stop = True