def _check_expired_tasks(self): time_now = int(now()) if time_now - self._ttl_check_last_run < 1000: # 1000 = 1sec return self._ttl_check_last_run = time_now TTL = SETTINGS.WORKER_TASK_TIMEOUT * 1000 for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items(): scheduled_task = self.scheduler_tasks.get(scheduled_task_name) if (scheduled_task_history.get('next_run') and scheduled_task_history.get('scheduled_task_id') and (time_now - scheduled_task_history.get('next_run')) > (scheduled_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT)*1000): task_id = scheduled_task_history.get('scheduled_task_id') log.info('Fix broken task id={}, name={}'.format(task_id, scheduled_task_name)) # Get task object from redis key key = SETTINGS.TASK_STORAGE_KEY.format(scheduled_task_history.get('scheduled_task_id')).encode('utf-8') task_obj = yield from self.connection.get(key) # Deserialize task object try: if not task_obj: raise TypeError() task = Task.deserialize(task_obj) if task.status != Task.SUCCESSFUL: # Update task object status task = task._replace(status=Task.FAILED) # Set new status to redis yield from self.connection.set(key, task.serialize(), expire=SETTINGS.TASK_STORAGE_EXPIRE) except TypeError as ex: task = None log.error("Wrong task id={}".format(scheduled_task_history.get('scheduled_task_id')), exc_info=True) yield from self.connection.delete([key]) # Publish message about finish (FAILED) if task: yield from self.connection.publish(SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), task.status.encode('utf-8')) else: yield from self.connection.publish(SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), Task.FAILED.encode('utf-8')) # Update scheduler information # Store next_run in scheduled try: task_scheduler_obj = yield from self.connection.hget(SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize(task_scheduler_obj) if task and task.status == Task.SUCCESSFUL: scheduled_task_history['last_run'] = scheduled_task_history.get('next_run', 0) scheduled_task_history['next_run'] = 0 task_scheduler = task_scheduler._replace(last_run=task_scheduler.next_run, next_run=0, scheduled_task_id=None) else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None task_scheduler = task_scheduler._replace(next_run=0, scheduled_task_id=None) yield from self.connection.hset(SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: # We lost SCHEDULER_HISTORY_HASH in db if task and task.status == Task.SUCCESSFUL: scheduled_task_history['last_run'] = scheduled_task_history.get('next_run', 0) scheduled_task_history['next_run'] = 0 else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None
def _cleanup_scheduled_history(self): # Clean hash table in redis for task with very old last-run and without next_run log.info("Run cleanup task for table Scheduled History") tasks_history = yield from self.connection.hgetall( SETTINGS.SCHEDULER_HISTORY_HASH) for f in tasks_history: key, value = yield from f # Iterate over all tasks in history and deserialize try: task_history = SchedulerTaskHistory.deserialize(value) except (pickle.UnpicklingError, EOFError, TypeError, ImportError): log.error( 'Cannot deserialize SchedulerTaskHistory for {}'.format( key), exc_info=True) continue if not task_history.next_run and ( int(now()) - task_history.last_run) > ( SETTINGS.SCHEDULED_HISTORY_CLEANUP_MAX_TTL * 1000): # task is too old, remove it log.info( 'Cleanup for Scheduled History table. Remove task, name={}' .format(task_history.name)) yield from self.connection.hdel( SETTINGS.SCHEDULER_HISTORY_HASH, [key]) self.current_loop.call_later(SETTINGS.SCHEDULED_HISTORY_CLEANUP_PERIOD, self._create_asyncio_task, self._cleanup_scheduled_history)
def _reload_config(self): time_now = int(now()) if time_now - self._reload_metrics_config_last_run < 1000: # 1000 = 1sec return self._reload_metrics_config_last_run = time_now config_version = self.storage.get_config_version() if config_version != self.config_version: yield from self._reload_metrics() self.config_version = config_version
def _reload_config(self): time_now = int(now()) if time_now - self._reload_triggers_config_last_run < 1000: # 1000 = 1sec return self._reload_triggers_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: yield from self._reload_triggers() self.config_version = config_version
def _reload_config_tasks_list(self): """ Load list of tasks, details """ time_now = int(now()) if time_now - self._ttl_reload_config_last_run < 1000: # 1000 = 1sec return self._ttl_reload_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: log.info('Changes in actions list, update.') new_scheduler_tasks = self.config.get_scheduled_actions() new_keys = set(new_scheduler_tasks.keys()) - set(self.scheduler_tasks.keys()) deleted_keys = set(self.scheduler_tasks.keys()) - set(new_scheduler_tasks.keys()) if new_keys or deleted_keys: log.info('New actions list, new_keys={}, deleted_keys={}'.format(new_keys, deleted_keys)) self.scheduler_tasks = new_scheduler_tasks yield from self._load_scheduler_tasks_history() # Check scheduler_tasks_history here, please # Возможно, интервал запуска изменился с длинного на короткий # А у нас уже next_run стоит далеко в будущем for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items(): # Смотри все таски, для которых сохранена инфорамция по шедулингу if scheduled_task_history.get('next_run', 0): # and (scheduled_task_name in self.scheduler_tasks): # Если есть запланированный таск if scheduled_task_name in self.scheduler_tasks: # Если у таска осталось расписание possible_next_run = datetime_to_timestamp(self._get_next_run_time(scheduled_task_name, self.scheduler_tasks[scheduled_task_name], int(now()))) else: # У таска не осталось расписания, next_run надо привести к 0 и больше ничего не делать possible_next_run = 0 if scheduled_task_history.get('next_run', 0) != possible_next_run: # Cancel scheduled task # Reset next_run task_id = scheduled_task_history.get('scheduled_task_id') log.info('Schedule changed for task with id={}, name={}, reschedule next_task'.format(task_id, scheduled_task_name)) key = SETTINGS.TASK_STORAGE_KEY.format(task_id).encode('utf-8') task_obj = yield from self.connection.delete([key]) scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = 0 try: task_scheduler_obj = yield from self.connection.hget(SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize(task_scheduler_obj) task_scheduler = task_scheduler._replace(next_run=0, scheduled_task_id=None) yield from self.connection.hset(SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: log.error('Broken SchedulerTaskHistory object for task id={}, delete it'.format(scheduled_task_name)) yield from self.connection.hdel(SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8')) # Удалился какой-то таск? Удалим его из мониторинга выполнения for key in deleted_keys: if key in self.scheduler_tasks_history: del self.scheduler_tasks_history[key] self.config_version = config_version
def loop(self): yield from self.bootstrap() while self.run: try: # Inside a while loop, fetch scheduled tasks t_start = time.time() # May be reload config (limited to 1 per second) yield from self._reload_config_tasks_list() # Refresh scheduler run history yield from self._load_scheduler_tasks_history() # Kill expired tasks (broken worker) yield from self._check_expired_tasks() current_time = now() for scheduler_task_name, scheduler_task in self.scheduler_tasks.items( ): scheduled_task_history = self.scheduler_tasks_history[ scheduler_task_name] # Iterate over all recurrent tasks if (scheduled_task_history.get('next_run', 0) <= scheduled_task_history.get('last_run', 0)): log.debug('Got unscheduled task {}'.format( scheduler_task_name)) # Task is not scheduled/executed now, so need to schedule next_run_dt = self._get_next_run_time( scheduler_task_name, scheduler_task, int(current_time)) log.debug('Next run {} for task {}'.format( next_run_dt, scheduler_task_name)) yield from self.schedule_task( name=scheduler_task_name, task_type=Task.TYPE_REGULAR, run_at=next_run_dt, ttl=scheduler_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT, kwargs=scheduler_task) yield from self._ping_disptacher() t_end = time.time() delay = SETTINGS.SCHEDULER_PULL_TIMEOUT - (t_end - t_start) if delay > 0: # Sleep for timeout or new push from scheduler try: yield from asyncio.wait([self.sleep_task], timeout=delay) except GeneratorExit: break except: log.error("Unexpected error in scheduler loop!", exc_info=True) self.current_loop.stop() self.connection.close() log.info('Bye-bye!')
def loop(self): yield from self.bootstrap() while self.run: try: # Inside a while loop, fetch scheduled tasks t_start = time.time() # May be reload config (limited to 1 per second) yield from self._reload_config_tasks_list() # Refresh scheduler run history yield from self._load_scheduler_tasks_history() # Kill expired tasks (broken worker) yield from self._check_expired_tasks() current_time = now() for scheduler_task_name, scheduler_task in self.scheduler_tasks.items(): scheduled_task_history = self.scheduler_tasks_history[scheduler_task_name] # Iterate over all recurrent tasks if (scheduled_task_history.get('next_run', 0) <= scheduled_task_history.get('last_run', 0)): log.debug('Got unscheduled task {}'.format(scheduler_task_name)) # Task is not scheduled/executed now, so need to schedule next_run_dt = self._get_next_run_time(scheduler_task_name, scheduler_task, int(current_time)) log.debug('Next run {} for task {}'.format(next_run_dt, scheduler_task_name)) yield from self.schedule_task(name=scheduler_task_name, task_type=Task.TYPE_REGULAR, run_at=next_run_dt, ttl=scheduler_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT, kwargs=scheduler_task) yield from self._ping_disptacher() t_end = time.time() delay = SETTINGS.SCHEDULER_PULL_TIMEOUT - (t_end - t_start) if delay > 0: # Sleep for timeout or new push from scheduler try: yield from asyncio.wait([self.sleep_task], timeout=delay) except GeneratorExit: break except: log.error("Unexpected error in scheduler loop!", exc_info=True) self.current_loop.stop() self.connection.close() log.info('Bye-bye!')
def _cleanup_scheduled_history(self): # Clean hash table in redis for task with very old last-run and without next_run log.info("Run cleanup task for table Scheduled History") tasks_history = yield from self.connection.hgetall(SETTINGS.SCHEDULER_HISTORY_HASH) for f in tasks_history: key, value = yield from f # Iterate over all tasks in history and deserialize try: task_history = SchedulerTaskHistory.deserialize(value) except (pickle.UnpicklingError, EOFError, TypeError, ImportError): log.error('Cannot deserialize SchedulerTaskHistory for {}'.format(key), exc_info=True) continue if not task_history.next_run and (int(now()) - task_history.last_run) > (SETTINGS.SCHEDULED_HISTORY_CLEANUP_MAX_TTL * 1000): # task is too old, remove it log.info('Cleanup for Scheduled History table. Remove task, name={}'.format(task_history.name)) yield from self.connection.hdel(SETTINGS.SCHEDULER_HISTORY_HASH, [key]) self.current_loop.call_later(SETTINGS.SCHEDULED_HISTORY_CLEANUP_PERIOD, self._create_asyncio_task, self._cleanup_scheduled_history)
def _check_expired_tasks(self): time_now = int(now()) if time_now - self._ttl_check_last_run < 1000: # 1000 = 1sec return self._ttl_check_last_run = time_now TTL = SETTINGS.WORKER_TASK_TIMEOUT * 1000 for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items( ): scheduled_task = self.scheduler_tasks.get(scheduled_task_name) if (scheduled_task_history.get('next_run') and scheduled_task_history.get('scheduled_task_id') and (time_now - scheduled_task_history.get('next_run')) > (scheduled_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT) * 1000): task_id = scheduled_task_history.get('scheduled_task_id') log.info('Fix broken task id={}, name={}'.format( task_id, scheduled_task_name)) # Get task object from redis key key = SETTINGS.TASK_STORAGE_KEY.format( scheduled_task_history.get('scheduled_task_id')).encode( 'utf-8') task_obj = yield from self.connection.get(key) # Deserialize task object try: if not task_obj: raise TypeError() task = Task.deserialize(task_obj) if task.status != Task.SUCCESSFUL: # Update task object status task = task._replace(status=Task.FAILED) # Set new status to redis yield from self.connection.set( key, task.serialize(), expire=SETTINGS.TASK_STORAGE_EXPIRE) except TypeError as ex: task = None log.error("Wrong task id={}".format( scheduled_task_history.get('scheduled_task_id')), exc_info=True) yield from self.connection.delete([key]) # Publish message about finish (FAILED) if task: yield from self.connection.publish( SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), task.status.encode('utf-8')) else: yield from self.connection.publish( SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), Task.FAILED.encode('utf-8')) # Update scheduler information # Store next_run in scheduled try: task_scheduler_obj = yield from self.connection.hget( SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize( task_scheduler_obj) if task and task.status == Task.SUCCESSFUL: scheduled_task_history[ 'last_run'] = scheduled_task_history.get( 'next_run', 0) scheduled_task_history['next_run'] = 0 task_scheduler = task_scheduler._replace( last_run=task_scheduler.next_run, next_run=0, scheduled_task_id=None) else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None task_scheduler = task_scheduler._replace( next_run=0, scheduled_task_id=None) yield from self.connection.hset( SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: # We lost SCHEDULER_HISTORY_HASH in db if task and task.status == Task.SUCCESSFUL: scheduled_task_history[ 'last_run'] = scheduled_task_history.get( 'next_run', 0) scheduled_task_history['next_run'] = 0 else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None
def _reload_config_tasks_list(self): """ Load list of tasks, details """ time_now = int(now()) if time_now - self._ttl_reload_config_last_run < 1000: # 1000 = 1sec return self._ttl_reload_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: log.info('Changes in actions list, update.') new_scheduler_tasks = self.config.get_scheduled_actions() new_keys = set(new_scheduler_tasks.keys()) - set( self.scheduler_tasks.keys()) deleted_keys = set(self.scheduler_tasks.keys()) - set( new_scheduler_tasks.keys()) if new_keys or deleted_keys: log.info( 'New actions list, new_keys={}, deleted_keys={}'.format( new_keys, deleted_keys)) self.scheduler_tasks = new_scheduler_tasks yield from self._load_scheduler_tasks_history() # Check scheduler_tasks_history here, please # Возможно, интервал запуска изменился с длинного на короткий # А у нас уже next_run стоит далеко в будущем for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items( ): # Смотри все таски, для которых сохранена инфорамция по шедулингу if scheduled_task_history.get( 'next_run', 0 ): # and (scheduled_task_name in self.scheduler_tasks): # Если есть запланированный таск if scheduled_task_name in self.scheduler_tasks: # Если у таска осталось расписание possible_next_run = datetime_to_timestamp( self._get_next_run_time( scheduled_task_name, self.scheduler_tasks[scheduled_task_name], int(now()))) else: # У таска не осталось расписания, next_run надо привести к 0 и больше ничего не делать possible_next_run = 0 if scheduled_task_history.get('next_run', 0) != possible_next_run: # Cancel scheduled task # Reset next_run task_id = scheduled_task_history.get( 'scheduled_task_id') log.info( 'Schedule changed for task with id={}, name={}, reschedule next_task' .format(task_id, scheduled_task_name)) key = SETTINGS.TASK_STORAGE_KEY.format(task_id).encode( 'utf-8') task_obj = yield from self.connection.delete([key]) scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = 0 try: task_scheduler_obj = yield from self.connection.hget( SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize( task_scheduler_obj) task_scheduler = task_scheduler._replace( next_run=0, scheduled_task_id=None) yield from self.connection.hset( SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: log.error( 'Broken SchedulerTaskHistory object for task id={}, delete it' .format(scheduled_task_name)) yield from self.connection.hdel( SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8')) # Удалился какой-то таск? Удалим его из мониторинга выполнения for key in deleted_keys: if key in self.scheduler_tasks_history: del self.scheduler_tasks_history[key] self.config_version = config_version